[llvm] [LLVM][IR] Use splat syntax when printing ConstantDataVector. (PR #112548)

Wed Oct 16 07:05:31 PDT 2024

https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/112548

Whilst this PR is not finished (there are many tests that still need updating) I'm sending for review because there's no value in me continuing if the goal of the PR is not acceptable. Likewise, if there's a better way to navigate the transition then I'm all ears.

I'm thinking that sooner or later the pain of having to update the tests will be required so I figured it was worth getting in early as an NFC patch rather than attempting this when trying to make the vector ConstantInt/ConstantFP support enabled by default.  Doing this early will also make it easier to track the cases where code quality diverges so I can circle back to them once the functional issues have been resolved..

>From bee588e3a9a0aa04bfeb6e7dfe0f3bff55541128 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Wed, 9 Oct 2024 17:03:38 +0100
Subject: [PATCH] [LLVM][IR] Use splat syntax when printing ConstantDataVector.

---
 llvm/lib/IR/AsmWriter.cpp                     |   45 +
 llvm/lib/IR/Constants.cpp                     |    6 +-
 .../Analysis/CostModel/AArch64/arith-fp.ll    |   14 +-
 .../CostModel/AArch64/arith-widening.ll       |    6 +-
 llvm/test/Analysis/CostModel/AArch64/div.ll   |  144 +-
 .../Analysis/CostModel/AArch64/div_cte.ll     |   12 +-
 llvm/test/Analysis/CostModel/AArch64/fshl.ll  |    8 +-
 llvm/test/Analysis/CostModel/AArch64/fshr.ll  |    8 +-
 llvm/test/Analysis/CostModel/AArch64/rem.ll   |  144 +-
 llvm/test/Analysis/CostModel/AMDGPU/div.ll    |  432 +++---
 llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll   |  384 ++---
 llvm/test/Analysis/CostModel/AMDGPU/fneg.ll   |   24 +-
 llvm/test/Analysis/CostModel/AMDGPU/mul.ll    |  192 +--
 llvm/test/Analysis/CostModel/AMDGPU/rem.ll    |  432 +++---
 llvm/test/Analysis/CostModel/ARM/divrem.ll    |  820 +++++-----
 .../Analysis/CostModel/RISCV/arith-int.ll     |   12 +-
 .../CostModel/RISCV/rvv-load-store.ll         |   12 +-
 .../Analysis/CostModel/RISCV/rvv-phi-const.ll |   28 +-
 .../Analysis/CostModel/RISCV/rvv-select.ll    |    2 +-
 .../CostModel/X86/arith-fp-codesize.ll        |   48 +-
 .../CostModel/X86/arith-fp-latency.ll         |   96 +-
 .../CostModel/X86/arith-fp-sizelatency.ll     |   84 +-
 llvm/test/Analysis/CostModel/X86/arith-fp.ll  |   96 +-
 .../Analysis/CostModel/X86/div-codesize.ll    |  336 ++--
 .../Analysis/CostModel/X86/div-latency.ll     |  384 ++---
 .../Analysis/CostModel/X86/div-sizelatency.ll |  336 ++--
 llvm/test/Analysis/CostModel/X86/div.ll       |  768 ++++-----
 .../Analysis/CostModel/X86/fshl-codesize.ll   |  504 +++---
 .../Analysis/CostModel/X86/fshl-latency.ll    |  486 +++---
 .../CostModel/X86/fshl-sizelatency.ll         |  552 +++----
 llvm/test/Analysis/CostModel/X86/fshl.ll      |  486 +++---
 .../Analysis/CostModel/X86/fshr-codesize.ll   |  504 +++---
 .../Analysis/CostModel/X86/fshr-latency.ll    |  486 +++---
 .../CostModel/X86/fshr-sizelatency.ll         |  552 +++----
 llvm/test/Analysis/CostModel/X86/fshr.ll      |  486 +++---
 .../Analysis/CostModel/X86/mul-codesize.ll    |  384 ++---
 .../Analysis/CostModel/X86/mul-latency.ll     |  384 ++---
 .../Analysis/CostModel/X86/mul-sizelatency.ll |  384 ++---
 llvm/test/Analysis/CostModel/X86/mul.ll       |  384 ++---
 .../Analysis/CostModel/X86/rem-codesize.ll    |  360 ++---
 .../Analysis/CostModel/X86/rem-latency.ll     |  336 ++--
 .../Analysis/CostModel/X86/rem-sizelatency.ll |  336 ++--
 llvm/test/Analysis/CostModel/X86/rem.ll       |  768 ++++-----
 .../Analysis/CostModel/X86/slm-arith-costs.ll |    8 +-
 llvm/test/Analysis/CostModel/X86/vdiv-cost.ll |   42 +-
 .../CostModel/X86/vshift-ashr-codesize.ll     |  154 +-
 .../X86/vshift-ashr-cost-inseltpoison.ll      |  160 +-
 .../CostModel/X86/vshift-ashr-cost.ll         |  160 +-
 .../CostModel/X86/vshift-ashr-latency.ll      |  184 +--
 .../CostModel/X86/vshift-ashr-sizelatency.ll  |  158 +-
 .../CostModel/X86/vshift-lshr-codesize.ll     |  150 +-
 .../X86/vshift-lshr-cost-inseltpoison.ll      |  144 +-
 .../CostModel/X86/vshift-lshr-cost.ll         |  144 +-
 .../CostModel/X86/vshift-lshr-latency.ll      |  164 +-
 .../CostModel/X86/vshift-lshr-sizelatency.ll  |  150 +-
 .../CostModel/X86/vshift-shl-codesize.ll      |  150 +-
 .../X86/vshift-shl-cost-inseltpoison.ll       |  144 +-
 .../Analysis/CostModel/X86/vshift-shl-cost.ll |  144 +-
 .../CostModel/X86/vshift-shl-latency.ll       |  164 +-
 .../CostModel/X86/vshift-shl-sizelatency.ll   |  132 +-
 .../Analysis/ValueTracking/known-non-zero.ll  |    6 +-
 .../knownbits-and-or-xor-lowbit.ll            |   14 +-
 .../ValueTracking/knownbits-bmi-pattern.ll    |   48 +-
 .../ValueTracking/knownbits-x86-hadd-hsub.ll  |    8 +-
 .../Analysis/ValueTracking/knownzero-shift.ll |    4 +-
 .../Analysis/ValueTracking/numsignbits-shl.ll |   12 +-
 llvm/test/Assembler/ConstantExprFold.ll       |    4 +-
 .../AMDGPU/amdgpu-codegenprepare-fdiv.ll      |   18 +-
 .../amdgpu-codegenprepare-i16-to-i32.ll       |    4 +-
 .../AMDGPU/amdgpu-codegenprepare-mul24.ll     |   44 +-
 .../AMDGPU/amdgpu-late-codegenprepare.ll      |    4 +-
 .../AMDGPU/amdgpu-simplify-libcall-pow.ll     |   62 +-
 .../AMDGPU/amdgpu-simplify-libcall-pown.ll    |   34 +-
 .../AMDGPU/amdgpu-simplify-libcall-powr.ll    |   34 +-
 .../AMDGPU/amdgpu-simplify-libcall-rootn.ll   |   34 +-
 .../lower-buffer-fat-pointers-pointer-ops.ll  |   12 +-
 .../AMDGPU/promote-alloca-array-aggregate.ll  |    2 +-
 .../CodeGen/AMDGPU/promote-alloca-memset.ll   |    2 +-
 llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll |    6 +-
 .../Hexagon/autohvx/vector-align-tbaa.ll      |   18 +-
 llvm/test/CodeGen/NVPTX/variadics-lowering.ll |    2 +-
 ...xed-vectors-strided-load-store-negative.ll |   16 +-
 .../rvv/fixed-vectors-strided-load-store.ll   |   22 +-
 .../LowOverheadLoops/tail-pred-const.ll       |  186 ++-
 .../Thumb2/mve-gather-optimisation-deep.ll    |   32 +-
 .../CodeGen/X86/codegen-prepare-extload.ll    |    2 +-
 .../MemorySanitizer/AArch64/arm64-vshift.ll   | 1378 ++++++++---------
 .../MemorySanitizer/AArch64/neon_vst.ll       |  242 +--
 .../MemorySanitizer/X86/avx-intrinsics-x86.ll |   74 +-
 .../X86/avx2-intrinsics-x86.ll                |   78 +-
 .../X86/sse2-intrinsics-x86.ll                |  102 +-
 .../X86/sse41-intrinsics-x86.ll               |   28 +-
 .../MemorySanitizer/masked-store-load.ll      |   36 +-
 .../vector-track-origins-neon.ll              |    2 -
 .../NumericalStabilitySanitizer/basic.ll      |   12 +-
 .../AggressiveInstCombine/ARM/fptosisat.ll    |   20 +-
 .../AggressiveInstCombine/X86/fptosisat.ll    |    4 +-
 .../AggressiveInstCombine/masked-cmp.ll       |   10 +-
 .../AggressiveInstCombine/popcount.ll         |   30 +-
 .../AggressiveInstCombine/trunc_multi_uses.ll |    8 +-
 .../AggressiveInstCombine/vector-or-load.ll   |    2 +-
 .../Transforms/Attributor/nofpclass-powi.ll   |    4 +-
 llvm/test/Transforms/Attributor/nofpclass.ll  |    8 +-
 .../value-simplify-pointer-info-vec.ll        |    4 +-
 llvm/test/Transforms/BDCE/binops-multiuse.ll  |    4 +-
 llvm/test/Transforms/BDCE/dead-uses.ll        |    8 +-
 .../Transforms/BDCE/vectors-inseltpoison.ll   |   30 +-
 llvm/test/Transforms/BDCE/vectors.ll          |   30 +-
 .../CodeGenPrepare/X86/fold-loop-of-urem.ll   |   14 +-
 .../AArch64/large-immediate.ll                |   16 +-
 .../ConstraintElimination/vector-compares.ll  |    2 +-
 .../CorrelatedValuePropagation/icmp.ll        |    4 +-
 .../CorrelatedValuePropagation/overflows.ll   |    4 +-
 .../CorrelatedValuePropagation/vectors.ll     |   22 +-
 .../X86/gather-null-pointer.ll                |    6 +-
 .../offsetted-overlapping-stores.ll           |    2 +-
 .../DivRemPairs/AMDGPU/div-rem-pairs.ll       |    4 +-
 llvm/test/Transforms/EarlyCSE/commute.ll      |    8 +-
 llvm/test/Transforms/EarlyCSE/gep.ll          |    2 +-
 .../GVN/non-integral-pointers-inseltpoison.ll |    4 +-
 .../Transforms/GVN/non-integral-pointers.ll   |    2 +-
 .../AMDGPU/infer-getelementptr.ll             |    2 +-
 .../InstCombine/2007-03-21-SignedRangeTest.ll |    4 +-
 .../InstCombine/2008-01-21-MulTrunc.ll        |    4 +-
 .../InstCombine/2008-07-11-RemAnd.ll          |    2 +-
 .../InstCombine/2008-12-17-SRemNegConstVec.ll |    2 +-
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll   |    2 +-
 .../InstCombine/X86/x86-masked-memops.ll      |    4 +-
 .../Transforms/InstCombine/X86/x86-movmsk.ll  |    2 +-
 .../InstCombine/X86/x86-muldq-inseltpoison.ll |   32 +-
 .../Transforms/InstCombine/X86/x86-muldq.ll   |   32 +-
 .../InstCombine/X86/x86-pack-inseltpoison.ll  |   48 +-
 .../Transforms/InstCombine/X86/x86-pack.ll    |   48 +-
 .../Transforms/InstCombine/X86/x86-pmulh.ll   |   12 +-
 .../Transforms/InstCombine/X86/x86-pmulhrs.ll |   12 +-
 .../Transforms/InstCombine/X86/x86-ternlog.ll |   30 +-
 .../X86/x86-vector-shifts-inseltpoison.ll     |  158 +-
 .../InstCombine/X86/x86-vector-shifts.ll      |  158 +-
 .../InstCombine/X86/x86-xop-inseltpoison.ll   |    4 +-
 .../Transforms/InstCombine/X86/x86-xop.ll     |    4 +-
 llvm/test/Transforms/InstCombine/abs-1.ll     |    2 +-
 .../Transforms/InstCombine/abs-intrinsic.ll   |    4 +-
 .../Transforms/InstCombine/add-mask-neg.ll    |   10 +-
 llvm/test/Transforms/InstCombine/add-mask.ll  |    2 +-
 .../InstCombine/add-shl-sdiv-to-srem.ll       |   18 +-
 .../test/Transforms/InstCombine/add-sitofp.ll |    8 +-
 llvm/test/Transforms/InstCombine/add.ll       |   58 +-
 llvm/test/Transforms/InstCombine/add4.ll      |    4 +-
 .../test/Transforms/InstCombine/add_or_sub.ll |    4 +-
 .../InstCombine/addsub-constant-folding.ll    |   54 +-
 .../InstCombine/adjust-for-minmax.ll          |   20 +-
 .../Transforms/InstCombine/and-compare.ll     |    6 +-
 llvm/test/Transforms/InstCombine/and-fcmp.ll  |    2 +-
 .../InstCombine/and-or-icmp-const-icmp.ll     |   18 +-
 .../Transforms/InstCombine/and-or-icmps.ll    |   24 +-
 llvm/test/Transforms/InstCombine/and-or.ll    |   38 +-
 .../test/Transforms/InstCombine/and-xor-or.ll |   46 +-
 llvm/test/Transforms/InstCombine/and.ll       |   88 +-
 llvm/test/Transforms/InstCombine/and2.ll      |    8 +-
 .../Transforms/InstCombine/apint-select.ll    |    2 +-
 .../Transforms/InstCombine/ashr-demand.ll     |    8 +-
 llvm/test/Transforms/InstCombine/ashr-lshr.ll |    2 +-
 llvm/test/Transforms/InstCombine/avg-lsb.ll   |    2 +-
 .../InstCombine/binop-and-shifts.ll           |   28 +-
 .../test/Transforms/InstCombine/binop-cast.ll |    4 +-
 .../InstCombine/binop-of-displaced-shifts.ll  |    2 +-
 .../binop-select-cast-of-select-cond.ll       |    6 +-
 .../Transforms/InstCombine/binop-select.ll    |    6 +-
 .../test/Transforms/InstCombine/bit-checks.ll |    6 +-
 llvm/test/Transforms/InstCombine/bit_ceil.ll  |    6 +-
 llvm/test/Transforms/InstCombine/bit_floor.ll |    6 +-
 .../InstCombine/bitcast-inseltpoison.ll       |    6 +-
 llvm/test/Transforms/InstCombine/bitcast.ll   |    8 +-
 .../test/Transforms/InstCombine/bitreverse.ll |    2 +-
 .../test/Transforms/InstCombine/bswap-fold.ll |   26 +-
 llvm/test/Transforms/InstCombine/bswap.ll     |    4 +-
 .../canonicalize-ashr-shl-to-masking.ll       |   28 +-
 ...etween-negative-and-positive-thresholds.ll |    8 +-
 ...ern-between-zero-and-positive-threshold.ll |    8 +-
 ...nt-low-bit-mask-and-icmp-eq-to-icmp-ule.ll |    4 +-
 ...nt-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll |    2 +-
 ...t-low-bit-mask-and-icmp-sge-to-icmp-sle.ll |    4 +-
 ...t-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll |    2 +-
 ...t-low-bit-mask-and-icmp-sle-to-icmp-sle.ll |    4 +-
 ...t-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll |    2 +-
 ...t-low-bit-mask-and-icmp-uge-to-icmp-ule.ll |    4 +-
 ...t-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll |    2 +-
 ...t-low-bit-mask-and-icmp-ule-to-icmp-ule.ll |    4 +-
 ...t-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll |    2 +-
 .../InstCombine/canonicalize-fcmp-inf.ll      |    2 +-
 ...icalize-lack-of-signed-truncation-check.ll |   10 +-
 ...ze-low-bit-mask-and-icmp-eq-to-icmp-ule.ll |    2 +-
 ...ze-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll |    2 +-
 ...low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll |    4 +-
 ...low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll |    4 +-
 ...low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll |    6 +-
 ...low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll |    6 +-
 .../canonicalize-lshr-shl-to-masking.ll       |   28 +-
 .../canonicalize-shl-lshr-to-masking.ll       |   28 +-
 .../canonicalize-signed-truncation-check.ll   |   10 +-
 .../InstCombine/cast-int-fcmp-eq-0.ll         |    2 +-
 .../InstCombine/cast-int-icmp-eq-0.ll         |    6 +-
 llvm/test/Transforms/InstCombine/cast.ll      |   60 +-
 .../Transforms/InstCombine/clamp-to-minmax.ll |    4 +-
 .../Transforms/InstCombine/cmp-intrinsic.ll   |   36 +-
 .../combine-is.fpclass-and-fcmp.ll            |    2 +-
 .../Transforms/InstCombine/compare-signs.ll   |   14 +-
 .../Transforms/InstCombine/compare-udiv.ll    |   16 +-
 .../InstCombine/consecutive-ptrmask.ll        |    2 +-
 .../InstCombine/copysign-fneg-fabs.ll         |    2 +-
 .../create-class-from-logic-fcmp.ll           |    4 +-
 .../InstCombine/ctlz-cttz-shifts.ll           |    6 +-
 .../test/Transforms/InstCombine/ctpop-cttz.ll |    2 +-
 .../test/Transforms/InstCombine/ctpop-pow2.ll |    6 +-
 llvm/test/Transforms/InstCombine/ctpop.ll     |   24 +-
 .../Transforms/InstCombine/dependent-ivs.ll   |    6 +-
 llvm/test/Transforms/InstCombine/div-shift.ll |    6 +-
 llvm/test/Transforms/InstCombine/div.ll       |   58 +-
 .../Transforms/InstCombine/eq-of-parts.ll     |    8 +-
 llvm/test/Transforms/InstCombine/exact.ll     |   22 +-
 llvm/test/Transforms/InstCombine/exp2-1.ll    |    8 +-
 .../Transforms/InstCombine/exp2-to-ldexp.ll   |    2 +-
 .../Transforms/InstCombine/fabs-as-int.ll     |    2 +-
 .../Transforms/InstCombine/fabs-copysign.ll   |    4 +-
 .../Transforms/InstCombine/fabs-fneg-fold.ll  |    2 +-
 llvm/test/Transforms/InstCombine/fadd.ll      |    2 +-
 llvm/test/Transforms/InstCombine/fast-math.ll |    2 +-
 .../InstCombine/fcmp-range-check-idiom.ll     |    2 +-
 llvm/test/Transforms/InstCombine/fcmp.ll      |    8 +-
 llvm/test/Transforms/InstCombine/fdiv.ll      |   18 +-
 llvm/test/Transforms/InstCombine/fma.ll       |   34 +-
 .../InstCombine/fmul-inseltpoison.ll          |    2 +-
 llvm/test/Transforms/InstCombine/fmul-sqrt.ll |    2 +-
 llvm/test/Transforms/InstCombine/fmul.ll      |   12 +-
 .../Transforms/InstCombine/fneg-as-int.ll     |    2 +-
 .../InstCombine/fneg-fabs-as-int.ll           |    2 +-
 .../InstCombine/fold-bin-operand.ll           |    2 +-
 .../InstCombine/fold-ctpop-of-not.ll          |    4 +-
 .../InstCombine/fold-select-fmul-if-zero.ll   |    2 +-
 .../InstCombine/fold-select-trunc.ll          |    2 +-
 .../InstCombine/fold-signbit-test-power2.ll   |   16 +-
 .../fold-sub-of-not-to-inc-of-add.ll          |    6 +-
 .../InstCombine/fpclass-check-idioms.ll       |    4 +-
 llvm/test/Transforms/InstCombine/fsh.ll       |    6 +-
 llvm/test/Transforms/InstCombine/funnel.ll    |   10 +-
 .../InstCombine/gep-combine-loop-invariant.ll |   10 +-
 .../Transforms/InstCombine/gep-custom-dl.ll   |    4 +-
 .../test/Transforms/InstCombine/gep-vector.ll |    4 +-
 .../get-lowbitmask-upto-and-including-bit.ll  |   16 +-
 .../Transforms/InstCombine/getelementptr.ll   |    6 +-
 .../high-bit-signmask-with-trunc.ll           |    2 +-
 .../InstCombine/high-bit-signmask.ll          |    2 +-
 ...n-out-of-bias-calculation-with-constant.ll |    2 +-
 .../hoist-negation-out-of-bias-calculation.ll |    4 +-
 .../hoist-not-from-ashr-operand.ll            |    4 +-
 ...hoist-xor-by-constant-from-xor-by-value.ll |    2 +-
 llvm/test/Transforms/InstCombine/icmp-add.ll  |   34 +-
 .../Transforms/InstCombine/icmp-and-shift.ll  |   26 +-
 .../InstCombine/icmp-div-constant.ll          |   10 +-
 .../Transforms/InstCombine/icmp-logical.ll    |   24 +-
 .../Transforms/InstCombine/icmp-mul-and.ll    |    6 +-
 llvm/test/Transforms/InstCombine/icmp-mul.ll  |   18 +-
 .../Transforms/InstCombine/icmp-of-and-x.ll   |    4 +-
 .../Transforms/InstCombine/icmp-of-or-x.ll    |    6 +-
 .../Transforms/InstCombine/icmp-of-xor-x.ll   |   10 +-
 llvm/test/Transforms/InstCombine/icmp-or.ll   |    6 +-
 .../icmp-power2-and-icmp-shifted-mask.ll      |   28 +-
 .../test/Transforms/InstCombine/icmp-range.ll |   16 +-
 .../InstCombine/icmp-shl-1-overflow.ll        |    6 +-
 .../Transforms/InstCombine/icmp-shl-nsw.ll    |    2 +-
 .../Transforms/InstCombine/icmp-shl-nuw.ll    |    8 +-
 llvm/test/Transforms/InstCombine/icmp-shl.ll  |    8 +-
 .../Transforms/InstCombine/icmp-shr-lt-gt.ll  |    2 +-
 llvm/test/Transforms/InstCombine/icmp-shr.ll  |    6 +-
 .../Transforms/InstCombine/icmp-signmask.ll   |    6 +-
 llvm/test/Transforms/InstCombine/icmp-sub.ll  |    6 +-
 .../test/Transforms/InstCombine/icmp-trunc.ll |   22 +-
 .../Transforms/InstCombine/icmp-uadd-sat.ll   |   92 +-
 ...al-to-icmp-eq-of-lshr-val-by-bits-and-0.ll |   10 +-
 ...al-to-icmp-eq-of-lshr-val-by-bits-and-0.ll |    4 +-
 ...al-to-icmp-ne-of-lshr-val-by-bits-and-0.ll |   10 +-
 ...al-to-icmp-ne-of-lshr-val-by-bits-and-0.ll |    4 +-
 .../Transforms/InstCombine/icmp-usub-sat.ll   |   20 +-
 .../InstCombine/icmp-vec-inseltpoison.ll      |   18 +-
 llvm/test/Transforms/InstCombine/icmp-vec.ll  |   18 +-
 .../InstCombine/icmp-xor-signbit.ll           |   10 +-
 llvm/test/Transforms/InstCombine/icmp.ll      |  216 +--
 .../insert-extract-shuffle-inseltpoison.ll    |    2 +-
 .../InstCombine/insert-extract-shuffle.ll     |    2 +-
 .../Transforms/InstCombine/insertelement.ll   |    8 +-
 .../integer-round-up-pow2-alignment.ll        |   24 +-
 .../InstCombine/intrinsic-select.ll           |    2 +-
 .../test/Transforms/InstCombine/intrinsics.ll |   12 +-
 .../test/Transforms/InstCombine/is_fpclass.ll |    8 +-
 llvm/test/Transforms/InstCombine/ispow2.ll    |   30 +-
 .../test/Transforms/InstCombine/known-bits.ll |    8 +-
 llvm/test/Transforms/InstCombine/ldexp-ext.ll |    4 +-
 llvm/test/Transforms/InstCombine/ldexp.ll     |    2 +-
 .../InstCombine/load-store-forward.ll         |    2 +-
 llvm/test/Transforms/InstCombine/log-pow.ll   |    2 +-
 .../logical-select-inseltpoison.ll            |    2 +-
 .../Transforms/InstCombine/logical-select.ll  |    6 +-
 .../Transforms/InstCombine/low-bit-splat.ll   |    2 +-
 .../InstCombine/lshr-and-negC-icmpeq-zero.ll  |    8 +-
 .../lshr-and-signbit-icmpeq-zero.ll           |    8 +-
 .../lshr-trunc-sext-to-ashr-sext.ll           |    8 +-
 llvm/test/Transforms/InstCombine/lshr.ll      |   18 +-
 .../InstCombine/masked-merge-add.ll           |   10 +-
 .../InstCombine/masked-merge-and-of-ors.ll    |    6 +-
 .../Transforms/InstCombine/masked-merge-or.ll |   10 +-
 .../InstCombine/masked-merge-xor.ll           |   10 +-
 .../Transforms/InstCombine/max-of-nots.ll     |    8 +-
 llvm/test/Transforms/InstCombine/maximum.ll   |    2 +-
 llvm/test/Transforms/InstCombine/maxnum.ll    |    2 +-
 .../test/Transforms/InstCombine/merge-icmp.ll |    6 +-
 .../Transforms/InstCombine/min-positive.ll    |    2 +-
 .../Transforms/InstCombine/minmax-fold.ll     |   16 +-
 .../InstCombine/minmax-intrinsics.ll          |   22 +-
 .../Transforms/InstCombine/minmax-of-xor-x.ll |    8 +-
 llvm/test/Transforms/InstCombine/modulo.ll    |   14 +-
 .../InstCombine/mul-inseltpoison.ll           |   24 +-
 .../Transforms/InstCombine/mul-masked-bits.ll |   12 +-
 llvm/test/Transforms/InstCombine/mul.ll       |   28 +-
 llvm/test/Transforms/InstCombine/mul_fold.ll  |    4 +-
 .../Transforms/InstCombine/narrow-math.ll     |   48 +-
 llvm/test/Transforms/InstCombine/narrow.ll    |    2 +-
 .../Transforms/InstCombine/negated-bitmask.ll |   16 +-
 .../Transforms/InstCombine/nested-select.ll   |   14 +-
 llvm/test/Transforms/InstCombine/not.ll       |   14 +-
 .../InstCombine/nsw-inseltpoison.ll           |    4 +-
 llvm/test/Transforms/InstCombine/nsw.ll       |    4 +-
 ...of-two-or-zero-when-comparing-with-zero.ll |    4 +-
 .../Transforms/InstCombine/onehot_merge.ll    |   32 +-
 .../InstCombine/operand-complexity.ll         |    2 +-
 llvm/test/Transforms/InstCombine/or-concat.ll |    8 +-
 llvm/test/Transforms/InstCombine/or.ll        |   38 +-
 .../Transforms/InstCombine/overflow-mul.ll    |    2 +-
 ...nput-masking-after-truncation-variant-a.ll |   16 +-
 ...nput-masking-after-truncation-variant-b.ll |   16 +-
 ...nput-masking-after-truncation-variant-c.ll |    6 +-
 ...nput-masking-after-truncation-variant-d.ll |   12 +-
 ...nput-masking-after-truncation-variant-e.ll |    4 +-
 ...dant-left-shift-input-masking-variant-a.ll |   16 +-
 ...dant-left-shift-input-masking-variant-b.ll |   16 +-
 ...dant-left-shift-input-masking-variant-c.ll |    8 +-
 ...dant-left-shift-input-masking-variant-d.ll |   14 +-
 ...dant-left-shift-input-masking-variant-e.ll |    4 +-
 llvm/test/Transforms/InstCombine/pow-1.ll     |   54 +-
 llvm/test/Transforms/InstCombine/pow-sqrt.ll  |   16 +-
 .../Transforms/InstCombine/pow-to-ldexp.ll    |   26 +-
 .../test/Transforms/InstCombine/pow_fp_int.ll |    2 +-
 llvm/test/Transforms/InstCombine/pr14365.ll   |    6 +-
 llvm/test/Transforms/InstCombine/pr17827.ll   |    8 +-
 llvm/test/Transforms/InstCombine/pr53357.ll   |    4 +-
 llvm/test/Transforms/InstCombine/ptrmask.ll   |    8 +-
 ...nput-masking-after-truncation-variant-a.ll |   12 +-
 ...nput-masking-after-truncation-variant-b.ll |   10 +-
 ...nput-masking-after-truncation-variant-c.ll |    4 +-
 ...nput-masking-after-truncation-variant-d.ll |   10 +-
 ...nput-masking-after-truncation-variant-e.ll |    2 +-
 ...nput-masking-after-truncation-variant-f.ll |    2 +-
 ...dant-left-shift-input-masking-variant-a.ll |   10 +-
 ...dant-left-shift-input-masking-variant-b.ll |   10 +-
 ...dant-left-shift-input-masking-variant-c.ll |    6 +-
 ...dant-left-shift-input-masking-variant-d.ll |   12 +-
 ...dant-left-shift-input-masking-variant-e.ll |    2 +-
 ...dant-left-shift-input-masking-variant-f.ll |    2 +-
 .../redundant-right-shift-input-masking.ll    |    2 +-
 .../Transforms/InstCombine/rem-mul-shl.ll     |    8 +-
 llvm/test/Transforms/InstCombine/rem.ll       |   16 +-
 .../reuse-constant-from-select-in-icmp.ll     |   30 +-
 llvm/test/Transforms/InstCombine/rotate.ll    |    8 +-
 .../InstCombine/sadd-with-overflow.ll         |    6 +-
 llvm/test/Transforms/InstCombine/sadd_sat.ll  |   14 +-
 .../InstCombine/saturating-add-sub.ll         |   86 +-
 .../Transforms/InstCombine/scalarization.ll   |    2 +-
 .../InstCombine/sdiv-canonicalize.ll          |    4 +-
 .../sdiv-exact-by-negative-power-of-two.ll    |    6 +-
 .../InstCombine/sdiv-exact-by-power-of-two.ll |    6 +-
 .../Transforms/InstCombine/select-bitext.ll   |   18 +-
 .../Transforms/InstCombine/select-divrem.ll   |    2 +-
 .../InstCombine/select-extractelement.ll      |    2 +-
 .../InstCombine/select-icmp-and-zero-shl.ll   |    4 +-
 .../Transforms/InstCombine/select-icmp-and.ll |   40 +-
 .../InstCombine/select-of-bittest.ll          |   24 +-
 .../InstCombine/select-safe-transforms.ll     |   16 +-
 .../InstCombine/select-with-bitwise-ops.ll    |   58 +-
 llvm/test/Transforms/InstCombine/select.ll    |   32 +-
 .../Transforms/InstCombine/select_meta.ll     |    2 +-
 .../set-lowbits-mask-canonicalize.ll          |   16 +-
 llvm/test/Transforms/InstCombine/set.ll       |   18 +-
 .../InstCombine/sext-of-trunc-nsw.ll          |    2 +-
 llvm/test/Transforms/InstCombine/sext.ll      |    8 +-
 llvm/test/Transforms/InstCombine/shift-add.ll |    4 +-
 ...ciation-in-bittest-with-truncation-lshr.ll |   16 +-
 ...ociation-in-bittest-with-truncation-shl.ll |    6 +-
 .../shift-amount-reassociation-in-bittest.ll  |    4 +-
 ...ount-reassociation-with-truncation-ashr.ll |    2 +-
 ...ount-reassociation-with-truncation-lshr.ll |    2 +-
 ...mount-reassociation-with-truncation-shl.ll |    2 +-
 .../InstCombine/shift-amount-reassociation.ll |    4 +-
 .../Transforms/InstCombine/shift-logic.ll     |   32 +-
 .../Transforms/InstCombine/shift-shift.ll     |   26 +-
 llvm/test/Transforms/InstCombine/shift-sra.ll |    8 +-
 llvm/test/Transforms/InstCombine/shift.ll     |   78 +-
 .../InstCombine/shl-and-negC-icmpeq-zero.ll   |    8 +-
 .../shl-and-signbit-icmpeq-zero.ll            |    8 +-
 llvm/test/Transforms/InstCombine/shl-bo.ll    |   72 +-
 .../test/Transforms/InstCombine/shl-demand.ll |   10 +-
 llvm/test/Transforms/InstCombine/shl-sub.ll   |   18 +-
 .../InstCombine/shl-unsigned-cmp-const.ll     |   12 +-
 .../shuffle_select-inseltpoison.ll            |   10 +-
 .../Transforms/InstCombine/shuffle_select.ll  |   10 +-
 .../signbit-lshr-and-icmpeq-zero.ll           |    2 +-
 .../signbit-shl-and-icmpeq-zero.ll            |    2 +-
 .../InstCombine/signed-truncation-check.ll    |   18 +-
 llvm/test/Transforms/InstCombine/signext.ll   |   14 +-
 .../signmask-of-sext-vs-of-shl-of-zext.ll     |    6 +-
 .../InstCombine/simplify-demanded-fpclass.ll  |    6 +-
 llvm/test/Transforms/InstCombine/sitofp.ll    |    2 +-
 llvm/test/Transforms/InstCombine/smin-icmp.ll |  106 +-
 llvm/test/Transforms/InstCombine/sqrt.ll      |    2 +-
 .../InstCombine/ssub-with-overflow.ll         |    6 +-
 .../InstCombine/sub-ashr-or-to-icmp-select.ll |   12 +-
 .../InstCombine/sub-lshr-or-to-icmp-select.ll |    2 +-
 llvm/test/Transforms/InstCombine/sub-not.ll   |    8 +-
 .../InstCombine/sub-of-negatible.ll           |    4 +-
 llvm/test/Transforms/InstCombine/sub-xor.ll   |   12 +-
 llvm/test/Transforms/InstCombine/sub.ll       |   62 +-
 .../Transforms/InstCombine/trunc-binop-ext.ll |    2 +-
 .../Transforms/InstCombine/trunc-demand.ll    |    2 +-
 .../InstCombine/trunc-inseltpoison.ll         |   26 +-
 .../InstCombine/trunc-shift-trunc.ll          |    4 +-
 llvm/test/Transforms/InstCombine/trunc.ll     |   26 +-
 .../InstCombine/truncating-saturate.ll        |   14 +-
 .../InstCombine/uadd-with-overflow.ll         |   16 +-
 llvm/test/Transforms/InstCombine/uaddo.ll     |    2 +-
 .../udiv_select_to_select_shift.ll            |    2 +-
 .../Transforms/InstCombine/umin_cttz_ctlz.ll  |    4 +-
 ...gned-add-lack-of-overflow-check-via-xor.ll |    2 +-
 .../unsigned-add-lack-of-overflow-check.ll    |    2 +-
 .../unsigned-add-overflow-check-via-xor.ll    |    2 +-
 .../unsigned-add-overflow-check.ll            |    2 +-
 .../InstCombine/unsigned_saturated_sub.ll     |    2 +-
 ...signext-of-variable-high-bit-extraction.ll |    2 +-
 .../vec_demanded_elts-inseltpoison.ll         |    2 +-
 .../InstCombine/vec_demanded_elts.ll          |    2 +-
 .../vec_phi_extract-inseltpoison.ll           |    2 +-
 .../Transforms/InstCombine/vec_phi_extract.ll |    2 +-
 llvm/test/Transforms/InstCombine/vec_sext.ll  |    2 +-
 .../InstCombine/vec_shuffle-inseltpoison.ll   |   28 +-
 .../Transforms/InstCombine/vec_shuffle.ll     |   28 +-
 .../InstCombine/vector-casts-inseltpoison.ll  |   20 +-
 .../Transforms/InstCombine/vector-casts.ll    |   20 +-
 .../test/Transforms/InstCombine/vector-mul.ll |   40 +-
 .../vector-reduce-min-max-known.ll            |    2 +-
 .../Transforms/InstCombine/vector-trunc.ll    |   10 +-
 .../Transforms/InstCombine/vector-udiv.ll     |    8 +-
 .../Transforms/InstCombine/vector-urem.ll     |   10 +-
 .../test/Transforms/InstCombine/vector-xor.ll |   20 +-
 .../Transforms/InstCombine/with_overflow.ll   |   30 +-
 llvm/test/Transforms/InstCombine/xor-ashr.ll  |    8 +-
 llvm/test/Transforms/InstCombine/xor.ll       |   54 +-
 .../InstCombine/zext-bool-add-sub.ll          |    2 +-
 .../zext-ctlz-trunc-to-ctlz-add.ll            |    2 +-
 llvm/test/Transforms/InstCombine/zext.ll      |   16 +-
 .../2011-09-05-InsertExtractValue.ll          |    6 +-
 llvm/test/Transforms/InstSimplify/AndOrXor.ll |   20 +-
 .../InstSimplify/ConstProp/bitcast.ll         |    2 +-
 .../Transforms/InstSimplify/ConstProp/cast.ll |    2 +-
 .../ConstProp/saturating-add-sub.ll           |   18 +-
 .../InstSimplify/ConstProp/vectorgep-crash.ll |    2 +-
 llvm/test/Transforms/InstSimplify/add_vp.ll   |    2 +-
 .../InstSimplify/bitcast-vector-fold.ll       |   39 +-
 .../Transforms/InstSimplify/canonicalize.ll   |    2 +-
 llvm/test/Transforms/InstSimplify/compare.ll  |    2 +-
 ...constantfold-add-nuw-allones-to-allones.ll |    2 +-
 .../constantfold-shl-nuw-C-to-C.ll            |    2 +-
 .../Transforms/InstSimplify/ctpop-pow2.ll     |   10 +-
 llvm/test/Transforms/InstSimplify/div.ll      |    6 +-
 llvm/test/Transforms/InstSimplify/exp10.ll    |   28 +-
 .../InstSimplify/fast-math-strictfp.ll        |    4 +-
 .../test/Transforms/InstSimplify/fast-math.ll |    4 +-
 llvm/test/Transforms/InstSimplify/fdiv.ll     |    2 +-
 .../floating-point-arithmetic-strictfp.ll     |    6 +-
 .../InstSimplify/floating-point-arithmetic.ll |   22 +-
 .../Transforms/InstSimplify/fminmax-folds.ll  |   14 +-
 llvm/test/Transforms/InstSimplify/fp-nan.ll   |    4 +-
 .../Transforms/InstSimplify/fptoi-range.ll    |   12 +-
 llvm/test/Transforms/InstSimplify/frexp.ll    |   24 +-
 llvm/test/Transforms/InstSimplify/gep.ll      |   14 +-
 .../Transforms/InstSimplify/icmp-constant.ll  |    6 +-
 .../Transforms/InstSimplify/insertelement.ll  |    2 +-
 .../InstSimplify/known-never-infinity.ll      |    8 +-
 .../Transforms/InstSimplify/known-non-zero.ll |    8 +-
 .../InstSimplify/maxmin_intrinsics.ll         |   28 +-
 llvm/test/Transforms/InstSimplify/negate.ll   |    2 +-
 llvm/test/Transforms/InstSimplify/or.ll       |    4 +-
 llvm/test/Transforms/InstSimplify/pr28725.ll  |    2 +-
 llvm/test/Transforms/InstSimplify/ptrmask.ll  |   22 +-
 llvm/test/Transforms/InstSimplify/rem.ll      |    2 +-
 llvm/test/Transforms/InstSimplify/returned.ll |    2 +-
 .../InstSimplify/saturating-add-sub.ll        |   22 +-
 llvm/test/Transforms/InstSimplify/sdiv.ll     |    6 +-
 .../InstSimplify/select-inseltpoison.ll       |    8 +-
 llvm/test/Transforms/InstSimplify/select.ll   |    8 +-
 .../Transforms/InstSimplify/select_or_and.ll  |    2 +-
 .../InstSimplify/shift-knownbits.ll           |    6 +-
 llvm/test/Transforms/InstSimplify/shift.ll    |    6 +-
 llvm/test/Transforms/InstSimplify/shr-nop.ll  |    2 +-
 .../shufflevector-inseltpoison.ll             |    4 +-
 .../Transforms/InstSimplify/shufflevector.ll  |    6 +-
 .../Transforms/InstSimplify/strictfp-fadd.ll  |   14 +-
 llvm/test/Transforms/InstSimplify/uscmp.ll    |    4 +-
 .../Transforms/InstSimplify/vector_gep.ll     |    2 +-
 ...interleave-load-extract-shuffle-changes.ll |    4 +-
 .../LoopLoadElim/type-mismatch-opaque-ptr.ll  |    2 +-
 .../Transforms/LoopLoadElim/type-mismatch.ll  |    2 +-
 .../LoopUnroll/ARM/mve-upperbound.ll          |    2 +-
 ...unrolling-legalize-vectors-inseltpoison.ll |   16 +-
 .../PowerPC/p8-unrolling-legalize-vectors.ll  |   16 +-
 .../LoopVectorize/AArch64/call-costs.ll       |    4 +-
 .../AArch64/conditional-branches-cost.ll      |   34 +-
 .../AArch64/deterministic-type-shrinkage.ll   |  108 +-
 .../epilog-vectorization-widen-inductions.ll  |   36 +-
 .../first-order-recurrence-fold-tail.ll       |    4 +-
 .../AArch64/force-target-instruction-cost.ll  |   24 +-
 .../AArch64/induction-costs-sve.ll            |    6 +-
 .../LoopVectorize/AArch64/induction-costs.ll  |  128 +-
 .../AArch64/loop-vectorization-factors.ll     |  200 +--
 ...ng-compatible-sve-no-maximize-bandwidth.ll |    4 +-
 .../LoopVectorize/AArch64/sve-epilog-vect.ll  |    6 +-
 .../AArch64/type-shrinkage-insertelt.ll       |    2 +-
 .../AArch64/vector-reverse-mask4.ll           |    4 +-
 .../ARM/mve-gather-scatter-tailpred.ll        |   40 +-
 .../Transforms/LoopVectorize/ARM/mve-qabs.ll  |   12 +-
 .../ARM/mve-reduction-predselect.ll           |   12 +-
 .../LoopVectorize/ARM/mve-reduction-types.ll  |   12 +-
 .../LoopVectorize/ARM/pointer_iv.ll           |    2 +-
 .../ARM/tail-fold-multiple-icmps.ll           |    4 +-
 .../ARM/tail-folding-not-allowed.ll           |   14 +-
 .../LoopVectorize/PowerPC/exit-branch-cost.ll |   32 +-
 .../PowerPC/optimal-epilog-vectorization.ll   |   36 +-
 .../LoopVectorize/RISCV/dead-ops-cost.ll      |    4 +-
 .../Transforms/LoopVectorize/RISCV/divrem.ll  |   32 +-
 .../RISCV/interleaved-accesses.ll             |   92 +-
 .../LoopVectorize/RISCV/low-trip-count.ll     |    6 +-
 .../Transforms/LoopVectorize/RISCV/pr88802.ll |   10 +-
 .../LoopVectorize/RISCV/uniform-load-store.ll |   24 +-
 ...vectorize-force-tail-with-evl-reduction.ll |    8 +-
 ...e-force-tail-with-evl-safe-dep-distance.ll |    2 +-
 .../SystemZ/force-target-instruction-cost.ll  |    2 +-
 .../LoopVectorize/SystemZ/pr47665.ll          |    2 +-
 .../predicated-first-order-recurrence.ll      |    6 +-
 .../LoopVectorize/X86/conversion-cost.ll      |   20 +-
 .../X86/cost-constant-known-via-scev.ll       |   16 +-
 .../LoopVectorize/X86/cost-model.ll           |   16 +-
 .../X86/divs-with-tail-folding.ll             |    2 +-
 .../X86/epilog-vectorization-inductions.ll    |  132 +-
 .../X86/fixed-order-recurrence.ll             |    4 +-
 .../LoopVectorize/X86/float-induction-x86.ll  |   24 +-
 .../LoopVectorize/X86/gather_scatter.ll       |   52 +-
 .../LoopVectorize/X86/gep-use-outside-loop.ll |    6 +-
 .../illegal-parallel-loop-uniform-write.ll    |   12 +-
 .../X86/imprecise-through-phis.ll             |   12 +-
 .../LoopVectorize/X86/induction-costs.ll      |  222 ++-
 .../LoopVectorize/X86/interleave-cost.ll      |    2 +-
 ...rleaved-accesses-sink-store-across-load.ll |   30 +-
 .../LoopVectorize/X86/load-deref-pred.ll      |    8 +-
 .../LoopVectorize/X86/masked-store-cost.ll    |   12 +-
 .../LoopVectorize/X86/masked_load_store.ll    |  194 +--
 .../Transforms/LoopVectorize/X86/optsize.ll   |   20 +-
 .../Transforms/LoopVectorize/X86/pr36524.ll   |    8 +-
 .../Transforms/LoopVectorize/X86/pr54634.ll   |    8 +-
 .../X86/pr55096-scalarize-add.ll              |    4 +-
 .../Transforms/LoopVectorize/X86/pr72969.ll   |    4 +-
 .../Transforms/LoopVectorize/X86/pr81872.ll   |   21 +-
 .../LoopVectorize/X86/predicate-switch.ll     |  200 +--
 .../LoopVectorize/X86/reduction-fastmath.ll   |   16 +-
 .../LoopVectorize/X86/scatter_crash.ll        |  250 ++-
 .../LoopVectorize/X86/small-size.ll           |    6 +-
 .../LoopVectorize/X86/strided_load_cost.ll    |   16 +-
 .../LoopVectorize/X86/tail_loop_folding.ll    |    4 +-
 .../LoopVectorize/X86/uniform_mem_op.ll       |   28 +-
 .../X86/vect.omp.force.small-tc.ll            |    2 +-
 .../X86/vectorize-interleaved-accesses-gap.ll |    4 +-
 .../X86/vplan-native-inner-loop-only.ll       |    2 +-
 ...ned-value-used-as-scalar-and-first-lane.ll |   32 +-
 .../x86-interleaved-accesses-masked-group.ll  |   66 +-
 ...86-interleaved-store-accesses-with-gaps.ll |   22 +-
 .../LoopVectorize/X86/x86-predication.ll      |    8 +-
 .../LoopVectorize/blend-in-header.ll          |    4 +-
 .../Transforms/LoopVectorize/bsd_regex.ll     |   10 +-
 .../LoopVectorize/create-induction-resume.ll  |    2 +-
 .../LoopVectorize/dbg-outer-loop-vect.ll      |   38 +-
 .../LoopVectorize/dead_instructions.ll        |   14 +-
 .../dont-fold-tail-for-const-TC.ll            |    6 +-
 .../dont-fold-tail-for-divisible-TC.ll        |    8 +-
 .../epilog-vectorization-any-of-reductions.ll |    4 +-
 .../epilog-vectorization-reductions.ll        |    6 +-
 ...log-vectorization-trunc-induction-steps.ll |    4 +-
 .../LoopVectorize/extract-last-veclane.ll     |    4 +-
 .../first-order-recurrence-complex.ll         |   38 +-
 ...t-order-recurrence-multiply-recurrences.ll |   14 +-
 .../LoopVectorize/first-order-recurrence.ll   |   82 +-
 .../LoopVectorize/float-induction.ll          |  136 +-
 .../LoopVectorize/if-conversion-nest.ll       |   22 +-
 .../LoopVectorize/if-pred-non-void.ll         |  132 +-
 .../LoopVectorize/if-pred-stores.ll           |    4 +-
 .../Transforms/LoopVectorize/if-reduction.ll  |   18 +-
 .../Transforms/LoopVectorize/induction.ll     |  312 ++--
 .../instruction-only-used-outside-of-loop.ll  |   12 +-
 .../LoopVectorize/interleaved-accesses.ll     |   38 +-
 .../invariant-store-vectorization.ll          |    6 +-
 .../Transforms/LoopVectorize/loop-form.ll     |   20 +-
 .../Transforms/LoopVectorize/loop-scalars.ll  |    2 +-
 .../LoopVectorize/memdep-fold-tail.ll         |    6 +-
 .../LoopVectorize/multiple-address-spaces.ll  |    2 +-
 ...o-fold-tail-by-masking-iv-external-uses.ll |    2 +-
 .../optimal-epilog-vectorization.ll           |   20 +-
 .../outer-loop-vec-phi-predecessor-order.ll   |   10 +-
 .../test/Transforms/LoopVectorize/phi-cost.ll |   10 +-
 .../LoopVectorize/pointer-induction.ll        |    2 +-
 .../LoopVectorize/pr44488-predication.ll      |    4 +-
 llvm/test/Transforms/LoopVectorize/pr45259.ll |    6 +-
 .../pr45679-fold-tail-by-masking.ll           |   24 +-
 .../pr51614-fold-tail-by-masking.ll           |    4 +-
 .../pr55167-fold-tail-live-out.ll             |   14 +-
 llvm/test/Transforms/LoopVectorize/pr66616.ll |    2 +-
 .../LoopVectorize/predicate-switch.ll         |   38 +-
 .../LoopVectorize/preserve-or-disjoint.ll     |    4 +-
 .../LoopVectorize/reduction-inloop-cond.ll    |   16 +-
 .../LoopVectorize/reduction-inloop-min-max.ll |    4 +-
 .../LoopVectorize/reduction-inloop-pred.ll    |   94 +-
 .../LoopVectorize/reduction-inloop-uf4.ll     |   34 +-
 .../LoopVectorize/reduction-inloop.ll         |   26 +-
 .../LoopVectorize/reduction-predselect.ll     |   50 +-
 .../LoopVectorize/reduction-small-size.ll     |   12 +-
 .../Transforms/LoopVectorize/reduction.ll     |   18 +-
 .../LoopVectorize/reverse_induction.ll        |   16 +-
 .../runtime-check-small-clamped-bounds.ll     |    8 +-
 .../Transforms/LoopVectorize/runtime-check.ll |    2 +-
 .../LoopVectorize/runtime-checks-hoist.ll     |    2 +-
 .../LoopVectorize/scalarize-masked-call.ll    |    4 +-
 .../LoopVectorize/scev-predicate-reasoning.ll |    2 +-
 ...tion-start-value-may-be-undef-or-poison.ll |   54 +-
 .../LoopVectorize/select-reduction.ll         |    6 +-
 .../LoopVectorize/single-value-blend-phis.ll  |   20 +-
 .../tail-folding-alloca-in-loop.ll            |   10 +-
 .../LoopVectorize/tail-folding-switch.ll      |    6 +-
 .../trip-count-expansion-may-introduce-ub.ll  |   24 +-
 .../LoopVectorize/trunc-extended-icmps.ll     |    2 +-
 .../Transforms/LoopVectorize/trunc-shifts.ll  |   80 +-
 .../Transforms/LoopVectorize/uniform-blend.ll |    2 +-
 .../uniform_across_vf_induction1.ll           |   90 +-
 .../uniform_across_vf_induction1_and.ll       |   54 +-
 .../uniform_across_vf_induction1_div_urem.ll  |   26 +-
 .../uniform_across_vf_induction1_lshr.ll      |   88 +-
 .../uniform_across_vf_induction2.ll           |  352 ++---
 .../Transforms/LoopVectorize/vector-geps.ll   |    2 +-
 .../version-stride-with-integer-casts.ll      |    6 +-
 .../dot-product-int-row-major.ll              |   16 +-
 .../multiply-add-sub-double-row-major.ll      |    6 +-
 llvm/test/Transforms/MemCpyOpt/form-memset.ll |    2 +-
 llvm/test/Transforms/NewGVN/completeness.ll   |    4 +-
 ...ting-sinking-required-for-vectorization.ll |   12 +-
 .../AArch64/matrix-extract-insert.ll          |   16 +-
 .../PhaseOrdering/AArch64/quant_4x4.ll        |    8 +-
 .../PhaseOrdering/AArch64/slpordering.ll      |    8 +-
 .../PhaseOrdering/ARM/arm_mult_q15.ll         |    4 +-
 .../X86/hoist-load-of-baseptr.ll              |    8 +-
 .../PhaseOrdering/X86/pixel-splat.ll          |    8 +-
 .../X86/pr48844-br-to-switch-vectorization.ll |   24 +-
 .../Transforms/PhaseOrdering/X86/pr50555.ll   |   24 +-
 .../Transforms/PhaseOrdering/X86/pr88239.ll   |    2 +-
 .../PhaseOrdering/X86/shuffle-inseltpoison.ll |    6 +-
 .../Transforms/PhaseOrdering/X86/shuffle.ll   |    6 +-
 .../PhaseOrdering/X86/speculation-vs-tbaa.ll  |    4 +-
 .../PhaseOrdering/X86/vdiv-nounroll.ll        |    2 +-
 .../test/Transforms/PhaseOrdering/X86/vdiv.ll |    8 +-
 .../Transforms/PhaseOrdering/X86/vec-shift.ll |   24 +-
 .../X86/vector-reductions-logical.ll          |   14 +-
 .../PhaseOrdering/X86/vector-reductions.ll    |    4 +-
 .../Reassociate/fast-ReassociateVector.ll     |   44 +-
 llvm/test/Transforms/Reassociate/negation.ll  |    6 +-
 .../Transforms/Reassociate/xor_reassoc.ll     |   18 +-
 .../test/Transforms/SCCP/add-nuw-nsw-flags.ll |    8 +-
 llvm/test/Transforms/SCCP/intrinsics.ll       |    2 +-
 llvm/test/Transforms/SCCP/ip-ranges-casts.ll  |    2 +-
 llvm/test/Transforms/SCCP/overdefined-ext.ll  |    8 +-
 .../Transforms/SCCP/trunc-nuw-nsw-flags.ll    |    2 +-
 llvm/test/Transforms/SCCP/vector-bitcast.ll   |    4 +-
 .../AArch64/external-use-icmp.ll              |    4 +-
 .../AArch64/extractelements-to-shuffle.ll     |    4 +-
 .../SLPVectorizer/AArch64/gather-cost.ll      |    6 +-
 .../SLPVectorizer/AArch64/gather-root.ll      |   12 +-
 .../SLPVectorizer/AArch64/loadi8.ll           |    2 +-
 .../SLPVectorizer/AArch64/loadorder.ll        |    8 +-
 .../AArch64/memory-runtime-checks.ll          |    6 +-
 .../SLPVectorizer/AArch64/sdiv-pow2.ll        |    2 +-
 .../AArch64/transpose-inseltpoison.ll         |    6 +-
 .../SLPVectorizer/AArch64/transpose.ll        |    6 +-
 .../SLPVectorizer/AArch64/trunc-insertion.ll  |    2 +-
 .../SLPVectorizer/AArch64/vec15-base.ll       |    6 +-
 .../SLPVectorizer/AArch64/vec3-base.ll        |   12 +-
 .../AArch64/vec3-reorder-reshuffle.ll         |    8 +-
 .../AArch64/vectorizable-selects-min-max.ll   |  102 +-
 .../vectorizable-selects-uniform-cmps.ll      |   16 +-
 .../Transforms/SLPVectorizer/AArch64/widen.ll |    4 +-
 .../Transforms/SLPVectorizer/NVPTX/v2f16.ll   |    4 +-
 .../SLPVectorizer/RISCV/complex-loads.ll      |   56 +-
 .../SLPVectorizer/RISCV/floating-point.ll     |   12 +-
 .../RISCV/getpointerschaincost.ll             |    6 +-
 .../SLPVectorizer/RISCV/load-binop-store.ll   |   20 +-
 .../SLPVectorizer/RISCV/load-store.ll         |    8 +-
 .../RISCV/minbw-with-and-and-scalar-trunc.ll  |    2 +-
 .../SLPVectorizer/RISCV/phi-const.ll          |    2 +-
 .../reduction-extension-after-bitwidth.ll     |    2 +-
 .../RISCV/select-profitability.ll             |    4 +-
 .../RISCV/smin-signed-zextended.ll            |    4 +-
 .../RISCV/trunc-bv-multi-uses.ll              |    2 +-
 .../RISCV/trunc-to-large-than-bw.ll           |    2 +-
 .../unsigned-node-trunc-with-signed-users.ll  |   10 +-
 .../SLPVectorizer/RISCV/vec15-base.ll         |    8 +-
 .../SLPVectorizer/RISCV/vec3-base.ll          |   18 +-
 .../SystemZ/ext-not-resized-op-resized.ll     |    4 +-
 .../SLPVectorizer/SystemZ/pr34619.ll          |    2 +-
 .../Transforms/SLPVectorizer/X86/PR35628_2.ll |    4 +-
 .../X86/alternate-int-inseltpoison.ll         |    8 +-
 .../SLPVectorizer/X86/alternate-int.ll        |    8 +-
 .../SLPVectorizer/X86/arith-and-const-load.ll |   12 +-
 .../Transforms/SLPVectorizer/X86/arith-div.ll |   88 +-
 .../SLPVectorizer/X86/arith-fshl-rot.ll       |    4 +-
 .../SLPVectorizer/X86/arith-fshr-rot.ll       |    4 +-
 .../SLPVectorizer/X86/barriercall.ll          |    2 +-
 .../Transforms/SLPVectorizer/X86/c-ray.ll     |    2 +-
 .../X86/cast-operand-extracted.ll             |    4 +-
 .../Transforms/SLPVectorizer/X86/cmp_sel.ll   |    2 +-
 .../X86/combined-stores-chains.ll             |    2 +-
 .../SLPVectorizer/X86/compare-reduce.ll       |    6 +-
 .../SLPVectorizer/X86/crash_bullet3.ll        |    6 +-
 .../SLPVectorizer/X86/crash_cmpop.ll          |   16 +-
 .../X86/crash_scheduling-inseltpoison.ll      |    2 +-
 .../SLPVectorizer/X86/crash_scheduling.ll     |    2 +-
 .../SLPVectorizer/X86/crash_sim4b1.ll         |    2 +-
 .../SLPVectorizer/X86/crash_smallpt.ll        |    2 +-
 llvm/test/Transforms/SLPVectorizer/X86/cse.ll |    8 +-
 .../SLPVectorizer/X86/debug-counter.ll        |    8 +-
 .../SLPVectorizer/X86/different-vec-widths.ll |   10 +-
 .../X86/external-used-across-reductions.ll    |    4 +-
 .../SLPVectorizer/X86/external_user.ll        |    6 +-
 .../SLPVectorizer/X86/extractcost.ll          |    2 +-
 .../X86/gather-node-same-as-vect-but-order.ll |    2 +-
 .../SLPVectorizer/X86/gather-with-cmp-user.ll |    2 +-
 llvm/test/Transforms/SLPVectorizer/X86/gep.ll |    2 +-
 .../SLPVectorizer/X86/geps-non-pow-2.ll       |    6 +-
 .../SLPVectorizer/X86/horizontal-list.ll      |    2 +-
 .../SLPVectorizer/X86/horizontal.ll           |    2 +-
 .../SLPVectorizer/X86/insert-after-bundle.ll  |   40 +-
 .../SLPVectorizer/X86/long_chains.ll          |   12 +-
 .../X86/matched-shuffled-entries.ll           |    6 +-
 .../X86/matching-gather-nodes-phi-users.ll    |    4 +-
 .../X86/minbitwidth-icmp-to-trunc.ll          |   14 +-
 .../SLPVectorizer/X86/minimum-sizes.ll        |    8 +-
 .../Transforms/SLPVectorizer/X86/mul64.ll     |    4 +-
 .../X86/non-power-of-2-order-detection.ll     |    2 +-
 llvm/test/Transforms/SLPVectorizer/X86/phi.ll |    8 +-
 .../test/Transforms/SLPVectorizer/X86/phi3.ll |    2 +-
 .../Transforms/SLPVectorizer/X86/powof2div.ll |    2 +-
 .../Transforms/SLPVectorizer/X86/powof2mul.ll |   28 +-
 .../Transforms/SLPVectorizer/X86/pr23510.ll   |   10 +-
 .../Transforms/SLPVectorizer/X86/pr35497.ll   |   20 +-
 .../Transforms/SLPVectorizer/X86/pr40522.ll   |    4 +-
 .../SLPVectorizer/X86/pr44067-inseltpoison.ll |    2 +-
 .../Transforms/SLPVectorizer/X86/pr44067.ll   |    2 +-
 .../Transforms/SLPVectorizer/X86/pr46983.ll   |   16 +-
 .../SLPVectorizer/X86/pr48879-sroa.ll         |    4 +-
 .../Transforms/SLPVectorizer/X86/pr49933.ll   |    2 +-
 .../SLPVectorizer/X86/propagate_ir_flags.ll   |   38 +-
 .../X86/reduction-bool-logic-op-inside.ll     |    2 +-
 .../SLPVectorizer/X86/reduction-logical.ll    |   10 +-
 .../Transforms/SLPVectorizer/X86/reduction.ll |    2 +-
 .../SLPVectorizer/X86/reduction2.ll           |    8 +-
 .../Transforms/SLPVectorizer/X86/resched.ll   |    2 +-
 .../X86/reuse-extracts-in-wider-vect.ll       |    2 +-
 .../X86/reused-scalars-in-buildvector.ll      |    2 +-
 .../SLPVectorizer/X86/schedule-bundle.ll      |    8 +-
 .../SLPVectorizer/X86/shrink_after_reorder.ll |    2 +-
 .../SLPVectorizer/X86/simple-loop.ll          |    6 +-
 .../X86/sitofp-minbitwidth-node.ll            |    2 +-
 .../X86/stackrestore-dependence.ll            |    2 +-
 .../SLPVectorizer/X86/stacksave-dependence.ll |   14 +-
 .../SLPVectorizer/X86/stores_vectorize.ll     |    6 +-
 .../subvector-minbitwidth-unsigned-value.ll   |    2 +-
 .../Transforms/SLPVectorizer/X86/supernode.ll |    2 +-
 .../SLPVectorizer/X86/unreachable.ll          |    2 +-
 .../Transforms/SLPVectorizer/X86/vec3-base.ll |    4 +-
 .../X86/vec_list_bias-inseltpoison.ll         |    2 +-
 .../SLPVectorizer/X86/vec_list_bias.ll        |    2 +-
 .../vec_list_bias_external_insert_shuffled.ll |    2 +-
 .../X86/vect_copyable_in_binops.ll            |    8 +-
 .../SLPVectorizer/X86/vectorize-pair-path.ll  |    2 +-
 .../SLPVectorizer/alternate-non-profitable.ll |    2 +-
 .../call-arg-reduced-by-minbitwidth.ll        |    4 +-
 .../extended-vectorized-gathered-inst.ll      |    2 +-
 ...nsert-element-build-vector-inseltpoison.ll |    2 +-
 .../insert-element-build-vector.ll            |    2 +-
 .../SLPVectorizer/jumbled_store_crash.ll      |    2 +-
 .../SLPVectorizer/operand-is-reduced-val.ll   |    4 +-
 .../SLPVectorizer/phi-node-bitwidt-op-not.ll  |    6 +-
 .../reduction-whole-regs-loads.ll             |    4 +-
 .../SLPVectorizer/reduction_loads.ll          |    2 +-
 ...ed-buildvector-matching-vectorized-node.ll |    4 +-
 .../SLPVectorizer/revec-fix-109835.ll         |    8 +-
 .../SLPVectorizer/shrink_after_reorder2.ll    |    4 +-
 llvm/test/Transforms/SROA/tbaa-struct3.ll     |    2 +-
 .../Scalarizer/phi-unreachable-pred.ll        |    2 +-
 .../nontrivial-unswitch-select.ll             |    2 +-
 .../X86/hoist-loads-stores-with-cf.ll         |   16 +-
 .../SimplifyCFG/preserve-store-alignment.ll   |   30 +-
 .../StraightLineStrengthReduce/slsr-add.ll    |    6 +-
 .../VectorCombine/AArch64/select-shuffle.ll   |   16 +-
 .../VectorCombine/AArch64/shrink-types.ll     |   10 +-
 .../AArch64/shuffletoidentity.ll              |    4 +-
 .../AArch64/vecreduce-shuffle.ll              |   42 +-
 .../RISCV/vpintrin-scalarization.ll           |   10 +-
 .../X86/insert-binop-with-constant.ll         |   18 +-
 .../VectorCombine/X86/shuffle-inseltpoison.ll |    8 +-
 .../VectorCombine/X86/shuffle-of-binops.ll    |    2 +-
 .../Transforms/VectorCombine/X86/shuffle.ll   |    8 +-
 830 files changed, 14676 insertions(+), 14003 deletions(-)

diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 6adb8d6a742255..93c66ee6d14540 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -89,6 +89,11 @@
 
 using namespace llvm;
 
+namespace llvm {
+extern cl::opt<bool> UseConstantIntForFixedLengthSplat;
+extern cl::opt<bool> UseConstantFPForFixedLengthSplat;
+} // namespace llvm
+
 // Make virtual table appear in this compilation unit.
 AssemblyAnnotationWriter::~AssemblyAnnotationWriter() = default;
 
@@ -1687,6 +1692,46 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     return;
   }
 
+  // When in the mode where Constant{Int,FP} do not support vector types the
+  // "splat(Ty val)" syntax is interpreted as a ConstantDataVector. Maintaining
+  // this association when outputiing the IR will significantly reduce the
+  // output changes when in the mode where Constant{Int,FP} do support vector
+  // types. In turn this should make it easier to spot difference in output
+  // when switching between the modes. Once the transition is complete this
+  // code will be removed.
+  if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(CV)) {
+    if (auto *SplatVal = CDV->getSplatValue()) {
+      Type* EltTy = SplatVal->getType();
+
+      if (EltTy->isIntegerTy() && !UseConstantIntForFixedLengthSplat) {
+        if (const ConstantInt *CI = dyn_cast<ConstantInt>(SplatVal)) {
+          Out << "splat (";
+          WriterCtx.TypePrinter->print(EltTy, Out);
+          Out << " ";
+
+          if (EltTy->isIntegerTy(1))
+            Out << (CI->getZExtValue() ? "true" : "false");
+          else
+            Out << CI->getValue();
+
+          Out << ")";
+          return;
+        }
+      }
+
+      if (EltTy->isFloatingPointTy() && !UseConstantFPForFixedLengthSplat) {
+        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(SplatVal)) {
+          Out << "splat (";
+          WriterCtx.TypePrinter->print(EltTy, Out);
+          Out << " ";
+          WriteAPFloatInternal(Out, CFP->getValueAPF());
+          Out << ")";
+          return;
+        }
+      }
+    }
+  }
+
   if (isa<ConstantVector>(CV) || isa<ConstantDataVector>(CV)) {
     auto *CVVTy = cast<FixedVectorType>(CV->getType());
     Type *ETy = CVVTy->getElementType();
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index fe3a086c5772de..d716b5c900d8e4 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -36,12 +36,14 @@ using namespace llvm;
 using namespace PatternMatch;
 
 // As set of temporary options to help migrate how splats are represented.
-static cl::opt<bool> UseConstantIntForFixedLengthSplat(
+namespace llvm {
+cl::opt<bool> UseConstantIntForFixedLengthSplat(
     "use-constant-int-for-fixed-length-splat", cl::init(false), cl::Hidden,
     cl::desc("Use ConstantInt's native fixed-length vector splat support."));
-static cl::opt<bool> UseConstantFPForFixedLengthSplat(
+cl::opt<bool> UseConstantFPForFixedLengthSplat(
     "use-constant-fp-for-fixed-length-splat", cl::init(false), cl::Hidden,
     cl::desc("Use ConstantFP's native fixed-length vector splat support."));
+}
 static cl::opt<bool> UseConstantIntForScalableSplat(
     "use-constant-int-for-scalable-splat", cl::init(false), cl::Hidden,
     cl::desc("Use ConstantInt's native scalable vector splat support."));
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
index aaffd97b92b2de..b329a5607acb97 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -70,15 +70,15 @@ define i32 @fsub(i32 %arg) {
 define i32 @fneg_idiom(i32 %arg) {
 ; CHECK-LABEL: 'fneg_idiom'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fsub half 0xH8000, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> splat (float -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %F16 = fsub half -0.0, undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll
index 52f6f73525a3b9..303bcfa289577c 100644
--- a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll
@@ -2091,11 +2091,11 @@ define void @extmulv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i6
 define void @extmul_const(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i64)  {
 ; CHECK-LABEL: 'extmul_const'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, splat (i16 10)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, splat (i16 10)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16b = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and = and <8 x i16> %sl1_8_16, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and = and <8 x i16> %sl1_8_16, splat (i16 255)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %aal_8_16 = mul <8 x i16> %zl1_8_16b, %and
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
diff --git a/llvm/test/Analysis/CostModel/AArch64/div.ll b/llvm/test/Analysis/CostModel/AArch64/div.ll
index 2ceaf0c6f536af..ada0be66c27b5a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/div.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div.ll
@@ -192,21 +192,21 @@ define i32 @sdiv_uniformconst() {
 ; CHECK-LABEL: 'sdiv_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 7
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = sdiv i128 undef, 7
@@ -238,21 +238,21 @@ define i32 @udiv_uniformconst() {
 ; CHECK-LABEL: 'udiv_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, 7
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = udiv i128 undef, 7
@@ -376,21 +376,21 @@ define i32 @sdiv_uniformconstpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I64 = sdiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = sdiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = sdiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = sdiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = sdiv i128 undef, 16
@@ -422,21 +422,21 @@ define i32 @udiv_uniformconstpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = udiv i128 undef, 16
@@ -560,21 +560,21 @@ define i32 @sdiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, -16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = sdiv i128 undef, -16
@@ -606,21 +606,21 @@ define i32 @udiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, -16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I128 = udiv i128 undef, -16
diff --git a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
index dfed023f0119d4..dd93aed53c0f2a 100644
--- a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
@@ -7,7 +7,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
 ; CHECK-LABEL: 'sdiv8xi16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, splat (i8 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div
 ;
   %div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
@@ -16,7 +16,7 @@ define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
 
 define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
 ; CHECK-LABEL: 'sdiv16xi8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, splat (i16 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
 ;
   %div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -25,7 +25,7 @@ define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
 
 define <4 x i32> @sdiv32xi4(<4 x i32> %x) {
 ; CHECK-LABEL: 'sdiv32xi4'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, splat (i32 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
 ;
   %div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
@@ -34,7 +34,7 @@ define <4 x i32> @sdiv32xi4(<4 x i32> %x) {
 
 define <16 x i8> @udiv8xi16(<16 x i8> %x) {
 ; CHECK-LABEL: 'udiv8xi16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <16 x i8> %x, splat (i8 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div
 ;
   %div = udiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
@@ -43,7 +43,7 @@ define <16 x i8> @udiv8xi16(<16 x i8> %x) {
 
 define <8 x i16> @udiv16xi8(<8 x i16> %x) {
 ; CHECK-LABEL: 'udiv16xi8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <8 x i16> %x, splat (i16 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
 ;
   %div = udiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@@ -52,7 +52,7 @@ define <8 x i16> @udiv16xi8(<8 x i16> %x) {
 
 define <4 x i32> @udiv32xi4(<4 x i32> %x) {
 ; CHECK-LABEL: 'udiv32xi4'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <4 x i32> %x, splat (i32 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
 ;
   %div = udiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
diff --git a/llvm/test/Analysis/CostModel/AArch64/fshl.ll b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
index eab4efe0613b5d..632f26dfa53829 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
@@ -96,7 +96,7 @@ declare i19 @llvm.fshl.i19(i19, i19, i19)
 
 define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
 ;
 entry:
@@ -128,7 +128,7 @@ declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
 
 define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
 ;
 entry:
@@ -160,7 +160,7 @@ declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
 
 define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
 ;
 entry:
@@ -192,7 +192,7 @@ declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 
 define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
 ;
 entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/fshr.ll b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
index 1876f0410c2337..a0a579ae96a9ba 100644
--- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
@@ -96,7 +96,7 @@ declare i19 @llvm.fshr.i19(i19, i19, i19)
 
 define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
 ;
 entry:
@@ -128,7 +128,7 @@ declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
 
 define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
 ;
 entry:
@@ -160,7 +160,7 @@ declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
 
 define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
 ;
 entry:
@@ -192,7 +192,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 
 define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
 ;
 entry:
diff --git a/llvm/test/Analysis/CostModel/AArch64/rem.ll b/llvm/test/Analysis/CostModel/AArch64/rem.ll
index 7519bf1f7c5c6f..2f1e8c8bf8dfa4 100644
--- a/llvm/test/Analysis/CostModel/AArch64/rem.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/rem.ll
@@ -178,21 +178,21 @@ define i32 @urem_const() {
 define i32 @srem_uniformconst() {
 ; CHECK-LABEL: 'srem_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 7
@@ -221,21 +221,21 @@ define i32 @srem_uniformconst() {
 define i32 @urem_uniformconst() {
 ; CHECK-LABEL: 'urem_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 7
@@ -350,21 +350,21 @@ define i32 @urem_constpow2() {
 define i32 @srem_uniformconstpow2() {
 ; CHECK-LABEL: 'srem_uniformconstpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = srem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I8 = srem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 704 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 704 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 16
@@ -393,21 +393,21 @@ define i32 @srem_uniformconstpow2() {
 define i32 @urem_uniformconstpow2() {
 ; CHECK-LABEL: 'urem_uniformconstpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 16
@@ -522,21 +522,21 @@ define i32 @urem_constnegpow2() {
 define i32 @srem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'srem_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, -16
@@ -565,21 +565,21 @@ define i32 @srem_uniformconstnegpow2() {
 define i32 @urem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'urem_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/div.ll b/llvm/test/Analysis/CostModel/AMDGPU/div.ll
index 406b67b9d45940..459c41a0b3eb5b 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/div.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/div.ll
@@ -337,59 +337,59 @@ define i32 @udiv_const() {
 define i32 @sdiv_uniformconst() {
 ; FAST-LABEL: 'sdiv_uniformconst'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'sdiv_uniformconst'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'sdiv_uniformconst'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 7
@@ -418,59 +418,59 @@ define i32 @sdiv_uniformconst() {
 define i32 @udiv_uniformconst() {
 ; FAST-LABEL: 'udiv_uniformconst'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'udiv_uniformconst'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'udiv_uniformconst'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 7
@@ -661,59 +661,59 @@ define i32 @udiv_constpow2() {
 define i32 @sdiv_uniformconstpow2() {
 ; FAST-LABEL: 'sdiv_uniformconstpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'sdiv_uniformconstpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'sdiv_uniformconstpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 16
@@ -742,59 +742,59 @@ define i32 @sdiv_uniformconstpow2() {
 define i32 @udiv_uniformconstpow2() {
 ; FAST-LABEL: 'udiv_uniformconstpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'udiv_uniformconstpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'udiv_uniformconstpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 16
@@ -985,59 +985,59 @@ define i32 @udiv_constnegpow2() {
 define i32 @sdiv_uniformconstnegpow2() {
 ; FAST-LABEL: 'sdiv_uniformconstnegpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'sdiv_uniformconstnegpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'sdiv_uniformconstnegpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, -16
@@ -1066,59 +1066,59 @@ define i32 @sdiv_uniformconstnegpow2() {
 define i32 @udiv_uniformconstnegpow2() {
 ; FAST-LABEL: 'udiv_uniformconstnegpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'udiv_uniformconstnegpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'udiv_uniformconstnegpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
index 911b4319eaa4e7..ab9d7f9dc859de 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
@@ -314,146 +314,146 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
 define amdgpu_kernel void @rcp_ieee() #0 {
 ; CIFASTF64-LABEL: 'rcp_ieee'
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CISLOWF64-LABEL: 'rcp_ieee'
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIFASTF64-LABEL: 'rcp_ieee'
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SISLOWF64-LABEL: 'rcp_ieee'
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'rcp_ieee'
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CI-SIZE-LABEL: 'rcp_ieee'
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SI-SIZE-LABEL: 'rcp_ieee'
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'rcp_ieee'
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fdiv half 1.0, undef
@@ -477,146 +477,146 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; CIFASTF64-LABEL: 'rcp_ftzdaz'
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CISLOWF64-LABEL: 'rcp_ftzdaz'
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIFASTF64-LABEL: 'rcp_ftzdaz'
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SISLOWF64-LABEL: 'rcp_ftzdaz'
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'rcp_ftzdaz'
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CI-SIZE-LABEL: 'rcp_ftzdaz'
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SI-SIZE-LABEL: 'rcp_ftzdaz'
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'rcp_ftzdaz'
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fdiv half 1.0, undef
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
index 9af5dd352d5883..734e76bd4053f5 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
@@ -139,24 +139,24 @@ define void @fneg_f64() {
 define i32 @fneg_idiom(i32 %arg) {
 ; CHECK-LABEL: 'fneg_idiom'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'fneg_idiom'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F32 = fsub float -0.0, undef
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
index e6193791ff53a0..ca5d2fdd938b56 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
@@ -215,78 +215,78 @@ define i32 @mul_constpow2() {
 define i32 @mul_uniformconstpow2() {
 ; SLOW16-LABEL: 'mul_uniformconstpow2'
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; FAST16-LABEL: 'mul_uniformconstpow2'
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW16-SIZE-LABEL: 'mul_uniformconstpow2'
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; FAST16-SIZE-LABEL: 'mul_uniformconstpow2'
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = mul i64 undef, 16
@@ -415,78 +415,78 @@ define i32 @mul_constnegpow2() {
 define i32 @mul_uniformconstnegpow2() {
 ; SLOW16-LABEL: 'mul_uniformconstnegpow2'
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; FAST16-LABEL: 'mul_uniformconstnegpow2'
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW16-SIZE-LABEL: 'mul_uniformconstnegpow2'
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; FAST16-SIZE-LABEL: 'mul_uniformconstnegpow2'
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = mul i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/rem.ll b/llvm/test/Analysis/CostModel/AMDGPU/rem.ll
index 87e35c7627de05..7e37566db44172 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/rem.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/rem.ll
@@ -337,59 +337,59 @@ define i32 @urem_const() {
 define i32 @srem_uniformconst() {
 ; FAST-LABEL: 'srem_uniformconst'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'srem_uniformconst'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'srem_uniformconst'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 7
@@ -418,59 +418,59 @@ define i32 @srem_uniformconst() {
 define i32 @urem_uniformconst() {
 ; FAST-LABEL: 'urem_uniformconst'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'urem_uniformconst'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'urem_uniformconst'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 7
@@ -661,59 +661,59 @@ define i32 @urem_constpow2() {
 define i32 @srem_uniformconstpow2() {
 ; FAST-LABEL: 'srem_uniformconstpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'srem_uniformconstpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'srem_uniformconstpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 16
@@ -742,59 +742,59 @@ define i32 @srem_uniformconstpow2() {
 define i32 @urem_uniformconstpow2() {
 ; FAST-LABEL: 'urem_uniformconstpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'urem_uniformconstpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'urem_uniformconstpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 16
@@ -985,59 +985,59 @@ define i32 @urem_constnegpow2() {
 define i32 @srem_uniformconstnegpow2() {
 ; FAST-LABEL: 'srem_uniformconstnegpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'srem_uniformconstnegpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'srem_uniformconstnegpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, -16
@@ -1066,59 +1066,59 @@ define i32 @srem_uniformconstnegpow2() {
 define i32 @urem_uniformconstnegpow2() {
 ; FAST-LABEL: 'urem_uniformconstnegpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'urem_uniformconstnegpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'urem_uniformconstnegpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/ARM/divrem.ll b/llvm/test/Analysis/CostModel/ARM/divrem.ll
index 36c25850323274..9f0c29c8bb0c65 100644
--- a/llvm/test/Analysis/CostModel/ARM/divrem.ll
+++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll
@@ -1031,98 +1031,98 @@ define void @vf64() {
 
 define void @vi8_2() {
 ; CHECK-NEON-LABEL: 'vi8_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi8_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi8_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi8_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi8_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
@@ -1146,98 +1146,98 @@ define void @vi8_2() {
 
 define void @vi16_2() {
 ; CHECK-NEON-LABEL: 'vi16_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi16_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi16_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi16_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi16_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
@@ -1261,98 +1261,98 @@ define void @vi16_2() {
 
 define void @vi32_2() {
 ; CHECK-NEON-LABEL: 'vi32_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi32_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi32_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi32_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi32_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
@@ -1376,98 +1376,98 @@ define void @vi32_2() {
 
 define void @vi64_2() {
 ; CHECK-NEON-LABEL: 'vi64_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi64_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %t2 = udiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %t3 = srem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %t4 = urem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %f1 = sdiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %f2 = udiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %f3 = srem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %f4 = urem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e1 = sdiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e2 = udiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %s1 = sdiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi64_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi64_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi64_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
@@ -1491,48 +1491,48 @@ define void @vi64_2() {
 
 define void @vf16_2() {
 ; CHECK-NEON-LABEL: 'vf16_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf16_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf16_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf16_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf16_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = fdiv <2 x half> undef, <half 2., half 2.>
@@ -1546,48 +1546,48 @@ define void @vf16_2() {
 
 define void @vf32_2() {
 ; CHECK-NEON-LABEL: 'vf32_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf32_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf32_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf32_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf32_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = fdiv <2 x float> undef, <float 2., float 2.>
@@ -1601,48 +1601,48 @@ define void @vf32_2() {
 
 define void @vf64_2() {
 ; CHECK-NEON-LABEL: 'vf64_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf64_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf64_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf64_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf64_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %1 = fdiv <2 x double> undef, <double 2., double 2.>
diff --git a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll
index c976f483fdfec2..5afc199e52d43a 100644
--- a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll
@@ -1173,9 +1173,9 @@ define void @add_of_constant() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = add <4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = add <4 x i32> zeroinitializer, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = add <2 x i64> zeroinitializer, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> <i64 1, i64 1>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> <i32 4096, i32 4096, i32 4096, i32 4096>, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> splat (i32 1), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> splat (i64 1), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> splat (i32 4096), undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %8 = add <4 x i32> <i32 1, i32 1, i32 2, i32 1>, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = add <4 x i32> <i32 2, i32 1, i32 1, i32 1>, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = add <4 x i32> <i32 0, i32 1, i32 2, i32 3>, undef
@@ -1191,9 +1191,9 @@ define void @add_of_constant() {
 ; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = add <4 x i32> undef, undef
 ; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = add <4 x i32> zeroinitializer, undef
 ; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = add <2 x i64> zeroinitializer, undef
-; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, undef
-; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> <i64 1, i64 1>, undef
-; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> <i32 4096, i32 4096, i32 4096, i32 4096>, undef
+; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> splat (i32 1), undef
+; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> splat (i64 1), undef
+; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> splat (i32 4096), undef
 ; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %8 = add <4 x i32> <i32 1, i32 1, i32 2, i32 1>, undef
 ; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = add <4 x i32> <i32 2, i32 1, i32 1, i32 1>, undef
 ; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = add <4 x i32> <i32 0, i32 1, i32 2, i32 3>, undef
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll
index 2448056076d8b4..6fc98da0eea97e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll
@@ -514,9 +514,9 @@ define void @store_of_constant(ptr %p) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr %p, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %p, align 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr %p, align 32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> <i32 4096, i32 4096, i32 4096, i32 4096>, ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 1), ptr %p, align 16
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: store <4 x i64> splat (i64 1), ptr %p, align 32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 4096), ptr %p, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 1, i32 1, i32 2, i32 1>, ptr %p, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 2, i32 1, i32 1, i32 1>, ptr %p, align 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, ptr %p, align 16
@@ -532,9 +532,9 @@ define void @store_of_constant(ptr %p) {
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr %p, align 16
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32
-; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %p, align 16
-; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr %p, align 32
-; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> <i32 4096, i32 4096, i32 4096, i32 4096>, ptr %p, align 16
+; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 1), ptr %p, align 16
+; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> splat (i64 1), ptr %p, align 32
+; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 4096), ptr %p, align 16
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 1, i32 1, i32 2, i32 1>, ptr %p, align 16
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 2, i32 1, i32 1, i32 1>, ptr %p, align 16
 ; CODESIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: store <4 x i32> <i32 0, i32 1, i32 2, i32 3>, ptr %p, align 16
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll
index 67b99f573aad21..344a34fa9a6309 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-phi-const.ll
@@ -24,7 +24,7 @@ define <4 x i8> @phi_v4i8_splat(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, %a ], [ <i8 2, i8 2, i8 2, i8 2>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i8> [ splat (i8 1), %a ], [ splat (i8 2), %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %x
 ;
   br i1 %c, label %a, label %b
@@ -79,7 +79,7 @@ define <4 x i8> @phi_v4i8_cheap_and_expensive(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, %a ], [ <i8 8, i8 32, i8 -59, i8 12>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i8> [ splat (i8 1), %a ], [ <i8 8, i8 32, i8 -59, i8 12>, %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %x
 ;
   br i1 %c, label %a, label %b
@@ -97,7 +97,7 @@ define <4 x i16> @phi_v4i16_splat(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i16> [ <i16 1, i16 1, i16 1, i16 1>, %a ], [ <i16 2, i16 2, i16 2, i16 2>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i16> [ splat (i16 1), %a ], [ splat (i16 2), %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %x
 ;
   br i1 %c, label %a, label %b
@@ -152,7 +152,7 @@ define <4 x i16> @phi_v4i16_cheap_and_expensive(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i16> [ <i16 1, i16 1, i16 1, i16 1>, %a ], [ <i16 7432, i16 -4832, i16 -11835, i16 12>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i16> [ splat (i16 1), %a ], [ <i16 7432, i16 -4832, i16 -11835, i16 12>, %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %x
 ;
   br i1 %c, label %a, label %b
@@ -170,7 +170,7 @@ define <4 x i32> @phi_v4i32_splat(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, %a ], [ <i32 2, i32 2, i32 2, i32 2>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i32> [ splat (i32 1), %a ], [ splat (i32 2), %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %x
 ;
   br i1 %c, label %a, label %b
@@ -225,7 +225,7 @@ define <4 x i32> @phi_v4i32_cheap_and_expensive(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, %a ], [ <i32 7432, i32 -4832, i32 381381, i32 12>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i32> [ splat (i32 1), %a ], [ <i32 7432, i32 -4832, i32 381381, i32 12>, %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %x
 ;
   br i1 %c, label %a, label %b
@@ -243,7 +243,7 @@ define <4 x i64> @phi_v4i64_splat(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i64> [ <i64 1, i64 1, i64 1, i64 1>, %a ], [ <i64 2, i64 2, i64 2, i64 2>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i64> [ splat (i64 1), %a ], [ splat (i64 2), %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %x
 ;
   br i1 %c, label %a, label %b
@@ -298,7 +298,7 @@ define <4 x i64> @phi_v4i64_cheap_and_expensive(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i64> [ <i64 1, i64 1, i64 1, i64 1>, %a ], [ <i64 7432, i64 -4832, i64 381381, i64 12>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x i64> [ splat (i64 1), %a ], [ <i64 7432, i64 -4832, i64 381381, i64 12>, %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %x
 ;
   br i1 %c, label %a, label %b
@@ -316,7 +316,7 @@ define <4 x half> @phi_v4f16_splat(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x half> [ <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, %a ], [ <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4D00>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x half> [ splat (half 0xH3C00), %a ], [ <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4D00>, %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %x
 ;
   br i1 %c, label %a, label %b
@@ -353,7 +353,7 @@ define <4 x half> @phi_v4f16_cheap_and_expensive(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x half> [ <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, %a ], [ <half 0xH6F42, half 0xHECB8, half 0xH5DF6, half 0xH4A40>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x half> [ splat (half 0xH3C00), %a ], [ <half 0xH6F42, half 0xHECB8, half 0xH5DF6, half 0xH4A40>, %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x half> %x
 ;
   br i1 %c, label %a, label %b
@@ -371,7 +371,7 @@ define <4 x float> @phi_v4f32_splat(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x float> [ <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %a ], [ <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+01>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x float> [ splat (float 1.000000e+00), %a ], [ <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+01>, %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %x
 ;
   br i1 %c, label %a, label %b
@@ -408,7 +408,7 @@ define <4 x float> @phi_v4f32_cheap_and_expensive(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x float> [ <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %a ], [ <float 7.432000e+03, float -4.832000e+03, float 3.815000e+02, float 1.250000e+01>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x float> [ splat (float 1.000000e+00), %a ], [ <float 7.432000e+03, float -4.832000e+03, float 3.815000e+02, float 1.250000e+01>, %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %x
 ;
   br i1 %c, label %a, label %b
@@ -426,7 +426,7 @@ define <4 x double> @phi_v4f64_splat(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x double> [ <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %a ], [ <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+01>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x double> [ splat (double 1.000000e+00), %a ], [ <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+01>, %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %x
 ;
   br i1 %c, label %a, label %b
@@ -463,7 +463,7 @@ define <4 x double> @phi_v4f64_cheap_and_expensive(i1 %c) {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br i1 %c, label %a, label %b
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: br label %d
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x double> [ <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %a ], [ <double 7.432000e+03, double -4.832000e+03, double 3.815000e+02, double 1.250000e+01>, %b ]
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %x = phi <4 x double> [ splat (double 1.000000e+00), %a ], [ <double 7.432000e+03, double -4.832000e+03, double 3.815000e+02, double 1.250000e+01>, %b ]
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %x
 ;
   br i1 %c, label %a, label %b
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll
index 2bf1e5d26e2da9..a76de31e919097 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-select.ll
@@ -393,7 +393,7 @@ define void @select() {
 
 define void @select_of_constants() {
 ; CHECK-LABEL: 'select_of_constants'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = select i1 undef, <2 x i64> <i64 128, i64 128>, <2 x i64> zeroinitializer
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = select i1 undef, <2 x i64> splat (i64 128), <2 x i64> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %2 = select i1 undef, <2 x i64> <i64 128, i64 127>, <2 x i64> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %3 = select i1 undef, <2 x i64> <i64 0, i64 1>, <2 x i64> zeroinitializer
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %4 = select i1 undef, <2 x i64> <i64 128, i64 533>, <2 x i64> <i64 0, i64 573>
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll
index b965a726262e68..f5b434881436ed 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll
@@ -130,46 +130,46 @@ define i32 @fsub(i32 %arg) {
 define i32 @fneg_idiom(i32 %arg) {
 ; SSE1-LABEL: 'fneg_idiom'
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE2-LABEL: 'fneg_idiom'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'fneg_idiom'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fneg_idiom'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F32 = fsub float -0.0, undef
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
index c147bd2eef6e7e..74fecc6c4a5747 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll
@@ -218,90 +218,90 @@ define i32 @fsub(i32 %arg) {
 define i32 @fneg_idiom(i32 %arg) {
 ; SSE1-LABEL: 'fneg_idiom'
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE2-LABEL: 'fneg_idiom'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fneg_idiom'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'fneg_idiom'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F32 = fsub float -0.000000e+00, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %F64 = fsub double -0.000000e+00, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'fneg_idiom'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fsub float -0.000000e+00, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fneg_idiom'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F32 = fsub float -0.000000e+00, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'fneg_idiom'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'fneg_idiom'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F32 = fsub float -0.000000e+00, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %F64 = fsub double -0.000000e+00, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F32 = fsub float -0.0, undef
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
index d9312ac05601de..a801a187b46cbd 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll
@@ -196,79 +196,79 @@ define i32 @fsub(i32 %arg) {
 define i32 @fneg_idiom(i32 %arg) {
 ; SSE1-LABEL: 'fneg_idiom'
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE2-LABEL: 'fneg_idiom'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fneg_idiom'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'fneg_idiom'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fneg_idiom'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'fneg_idiom'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'fneg_idiom'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %F32 = fsub float -0.0, undef
diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
index 90871e3a3831c0..d26c856f270762 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll
@@ -218,90 +218,90 @@ define i32 @fsub(i32 %arg) {
 define i32 @fneg_idiom(i32 %arg) {
 ; SSE1-LABEL: 'fneg_idiom'
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SSE1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SSE1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SSE1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE2-LABEL: 'fneg_idiom'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'fneg_idiom'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'fneg_idiom'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'fneg_idiom'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'fneg_idiom'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'fneg_idiom'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'fneg_idiom'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %F32 = fsub float -0.0, undef
diff --git a/llvm/test/Analysis/CostModel/X86/div-codesize.ll b/llvm/test/Analysis/CostModel/X86/div-codesize.ll
index 801b5f4e9ec06e..88c4a5dc185037 100644
--- a/llvm/test/Analysis/CostModel/X86/div-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/div-codesize.ll
@@ -186,21 +186,21 @@ define i32 @udiv_const() {
 define i32 @sdiv_uniformconst() {
 ; CHECK-LABEL: 'sdiv_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 7
@@ -229,21 +229,21 @@ define i32 @sdiv_uniformconst() {
 define i32 @udiv_uniformconst() {
 ; CHECK-LABEL: 'udiv_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 7
@@ -548,97 +548,97 @@ define i32 @udiv_constpow2() {
 define i32 @sdiv_uniformconstpow2() {
 ; SSE-LABEL: 'sdiv_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'sdiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'sdiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'sdiv_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'sdiv_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 16
@@ -667,97 +667,97 @@ define i32 @sdiv_uniformconstpow2() {
 define i32 @udiv_uniformconstpow2() {
 ; SSE-LABEL: 'udiv_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'udiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'udiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'udiv_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'udiv_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 16
@@ -872,21 +872,21 @@ define i32 @udiv_constnegpow2() {
 define i32 @sdiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, -16
@@ -915,21 +915,21 @@ define i32 @sdiv_uniformconstnegpow2() {
 define i32 @udiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/X86/div-latency.ll b/llvm/test/Analysis/CostModel/X86/div-latency.ll
index 6afb9c790e3520..aada70124a9b47 100644
--- a/llvm/test/Analysis/CostModel/X86/div-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/div-latency.ll
@@ -186,21 +186,21 @@ define i32 @udiv_const() {
 define i32 @sdiv_uniformconst() {
 ; CHECK-LABEL: 'sdiv_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 7
@@ -229,21 +229,21 @@ define i32 @sdiv_uniformconst() {
 define i32 @udiv_uniformconst() {
 ; CHECK-LABEL: 'udiv_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 7
@@ -586,116 +586,116 @@ define i32 @udiv_constpow2() {
 define i32 @sdiv_uniformconstpow2() {
 ; SSE-LABEL: 'sdiv_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'sdiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'sdiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'sdiv_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'sdiv_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'sdiv_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 101 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 16
@@ -724,116 +724,116 @@ define i32 @sdiv_uniformconstpow2() {
 define i32 @udiv_uniformconstpow2() {
 ; SSE-LABEL: 'udiv_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'udiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'udiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'udiv_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'udiv_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'udiv_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 16
@@ -948,21 +948,21 @@ define i32 @udiv_constnegpow2() {
 define i32 @sdiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, -16
@@ -991,21 +991,21 @@ define i32 @sdiv_uniformconstnegpow2() {
 define i32 @udiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll
index d54dae0a21a066..efb853e3721b54 100644
--- a/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/div-sizelatency.ll
@@ -186,21 +186,21 @@ define i32 @udiv_const() {
 define i32 @sdiv_uniformconst() {
 ; CHECK-LABEL: 'sdiv_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 7
@@ -229,21 +229,21 @@ define i32 @sdiv_uniformconst() {
 define i32 @udiv_uniformconst() {
 ; CHECK-LABEL: 'udiv_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 7
@@ -548,97 +548,97 @@ define i32 @udiv_constpow2() {
 define i32 @sdiv_uniformconstpow2() {
 ; SSE-LABEL: 'sdiv_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'sdiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'sdiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'sdiv_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 35 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'sdiv_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 16
@@ -667,97 +667,97 @@ define i32 @sdiv_uniformconstpow2() {
 define i32 @udiv_uniformconstpow2() {
 ; SSE-LABEL: 'udiv_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'udiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'udiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'udiv_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'udiv_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 16
@@ -872,21 +872,21 @@ define i32 @udiv_constnegpow2() {
 define i32 @sdiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, -16
@@ -915,21 +915,21 @@ define i32 @sdiv_uniformconstnegpow2() {
 define i32 @udiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/X86/div.ll b/llvm/test/Analysis/CostModel/X86/div.ll
index 4bbd70e87a1a95..8ffd6835004f91 100644
--- a/llvm/test/Analysis/CostModel/X86/div.ll
+++ b/llvm/test/Analysis/CostModel/X86/div.ll
@@ -376,97 +376,97 @@ define i32 @udiv_const() {
 define i32 @sdiv_uniformconst() {
 ; SSE-LABEL: 'sdiv_uniformconst'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'sdiv_uniformconst'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'sdiv_uniformconst'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'sdiv_uniformconst'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'sdiv_uniformconst'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 7
@@ -495,97 +495,97 @@ define i32 @sdiv_uniformconst() {
 define i32 @udiv_uniformconst() {
 ; SSE-LABEL: 'udiv_uniformconst'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'udiv_uniformconst'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'udiv_uniformconst'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'udiv_uniformconst'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'udiv_uniformconst'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 7
@@ -928,135 +928,135 @@ define i32 @udiv_constpow2() {
 define i32 @sdiv_uniformconstpow2() {
 ; SSE2-LABEL: 'sdiv_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'sdiv_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'sdiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'sdiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'sdiv_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'sdiv_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'sdiv_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, 16
@@ -1085,97 +1085,97 @@ define i32 @sdiv_uniformconstpow2() {
 define i32 @udiv_uniformconstpow2() {
 ; SSE-LABEL: 'udiv_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'udiv_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'udiv_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'udiv_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'udiv_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, 16
@@ -1480,97 +1480,97 @@ define i32 @udiv_constnegpow2() {
 define i32 @sdiv_uniformconstnegpow2() {
 ; SSE-LABEL: 'sdiv_uniformconstnegpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'sdiv_uniformconstnegpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'sdiv_uniformconstnegpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'sdiv_uniformconstnegpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'sdiv_uniformconstnegpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = sdiv i64 undef, -16
@@ -1599,97 +1599,97 @@ define i32 @sdiv_uniformconstnegpow2() {
 define i32 @udiv_uniformconstnegpow2() {
 ; SSE-LABEL: 'udiv_uniformconstnegpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'udiv_uniformconstnegpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'udiv_uniformconstnegpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'udiv_uniformconstnegpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'udiv_uniformconstnegpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = udiv i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/X86/fshl-codesize.ll b/llvm/test/Analysis/CostModel/X86/fshl-codesize.ll
index 71927002b599fd..fccabe39cd218f 100644
--- a/llvm/test/Analysis/CostModel/X86/fshl-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshl-codesize.ll
@@ -1244,86 +1244,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i64'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i64'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i64'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
@@ -1336,86 +1336,86 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i32'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i32'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i32'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i32'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
@@ -1428,86 +1428,86 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i16'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
@@ -1520,86 +1520,86 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i8'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
@@ -2602,51 +2602,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i64'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
@@ -2659,51 +2659,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
@@ -2716,79 +2716,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
@@ -2801,79 +2801,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
diff --git a/llvm/test/Analysis/CostModel/X86/fshl-latency.ll b/llvm/test/Analysis/CostModel/X86/fshl-latency.ll
index c40394ba9a7283..a82a07d8ce54d9 100644
--- a/llvm/test/Analysis/CostModel/X86/fshl-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshl-latency.ll
@@ -1217,86 +1217,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i64'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i64'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i64'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
@@ -1309,79 +1309,79 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i32'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i32'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i32'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
@@ -1394,79 +1394,79 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
@@ -1479,79 +1479,79 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
@@ -2554,51 +2554,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i64'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
@@ -2611,51 +2611,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
@@ -2668,79 +2668,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
@@ -2753,79 +2753,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
diff --git a/llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll
index 7b0daf50485505..c5e99b36e87ab1 100644
--- a/llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshl-sizelatency.ll
@@ -1244,86 +1244,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i64'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i64'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i64'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
@@ -1336,86 +1336,86 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i32'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i32'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i32'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i32'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
@@ -1428,86 +1428,86 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i16'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
@@ -1520,86 +1520,86 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i8'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
@@ -2786,79 +2786,79 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i64'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i64'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i64'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i64'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i64'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i64'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
@@ -2871,79 +2871,79 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i32'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i32'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i32'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i32'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i32'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
@@ -2956,79 +2956,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
@@ -3041,79 +3041,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
diff --git a/llvm/test/Analysis/CostModel/X86/fshl.ll b/llvm/test/Analysis/CostModel/X86/fshl.ll
index 127dec0a1a6f78..0ee9fff1229e5c 100644
--- a/llvm/test/Analysis/CostModel/X86/fshl.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshl.ll
@@ -1210,86 +1210,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i64'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i64'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i64'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshl.i64(i64 %a64, i64 %b64, i64 7)
@@ -1302,79 +1302,79 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i32'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i32'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i32'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshl.i32(i32 %a32, i32 %b32, i32 5)
@@ -1387,79 +1387,79 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshl.i16(i16 %a16, i16 %b16, i16 3)
@@ -1472,79 +1472,79 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshl.i8(i8 %a8, i8 %b8, i8 3)
@@ -2540,51 +2540,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i64'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshl.i64(i64 %a64, i64 %a64, i64 7)
@@ -2597,51 +2597,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshl.i32(i32 %a32, i32 %a32, i32 5)
@@ -2654,79 +2654,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshl.i16(i16 %a16, i16 %a16, i16 3)
@@ -2739,79 +2739,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshl.i8(i8 %a8, i8 %a8, i8 3)
diff --git a/llvm/test/Analysis/CostModel/X86/fshr-codesize.ll b/llvm/test/Analysis/CostModel/X86/fshr-codesize.ll
index 92a20b938142a7..0e4b635e863512 100644
--- a/llvm/test/Analysis/CostModel/X86/fshr-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshr-codesize.ll
@@ -1244,86 +1244,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i64'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i64'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i64'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
@@ -1336,86 +1336,86 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i32'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i32'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i32'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i32'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
@@ -1428,86 +1428,86 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i16'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
@@ -1520,86 +1520,86 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i8'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
@@ -2602,51 +2602,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i64'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
@@ -2659,51 +2659,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
@@ -2716,79 +2716,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
@@ -2801,79 +2801,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
diff --git a/llvm/test/Analysis/CostModel/X86/fshr-latency.ll b/llvm/test/Analysis/CostModel/X86/fshr-latency.ll
index 33fadef536bf74..b71fd6c38fc7f3 100644
--- a/llvm/test/Analysis/CostModel/X86/fshr-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshr-latency.ll
@@ -1217,86 +1217,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i64'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i64'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i64'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
@@ -1309,79 +1309,79 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i32'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i32'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i32'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
@@ -1394,79 +1394,79 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
@@ -1479,79 +1479,79 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 73 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
@@ -2554,51 +2554,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i64'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
@@ -2611,51 +2611,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
@@ -2668,79 +2668,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
@@ -2753,79 +2753,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 61 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
diff --git a/llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll
index ef831328c480e7..9b848d42a6f288 100644
--- a/llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshr-sizelatency.ll
@@ -1244,86 +1244,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i64'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i64'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i64'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
@@ -1336,86 +1336,86 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i32'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i32'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i32'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i32'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i32'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
@@ -1428,86 +1428,86 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i16'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i16'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
@@ -1520,86 +1520,86 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i8'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i8'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
@@ -2786,79 +2786,79 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i64'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i64'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i64'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i64'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i64'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i64'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
@@ -2871,79 +2871,79 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i32'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i32'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i32'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i32'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i32'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
@@ -2956,79 +2956,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
@@ -3041,79 +3041,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
diff --git a/llvm/test/Analysis/CostModel/X86/fshr.ll b/llvm/test/Analysis/CostModel/X86/fshr.ll
index 3c233b51053ddc..2b970ba50c086e 100644
--- a/llvm/test/Analysis/CostModel/X86/fshr.ll
+++ b/llvm/test/Analysis/CostModel/X86/fshr.ll
@@ -1210,86 +1210,86 @@ define void @constant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512, i64 %b64, <2 x i64> %b128, <4 x i64> %b256, <8 x i64> %b512) {
 ; SSSE3-LABEL: 'splatconstant_funnel_i64'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SSE42-LABEL: 'splatconstant_funnel_i64'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i64'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i64'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i64'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i64'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i64'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> <i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %b128, <2 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %b256, <4 x i64> splat (i64 7))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %b512, <8 x i64> splat (i64 7))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshr.i64(i64 %a64, i64 %b64, i64 7)
@@ -1302,79 +1302,79 @@ define void @splatconstant_funnel_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512, i32 %b32, <4 x i32> %b128, <8 x i32> %b256, <16 x i32> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i32'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i32'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i32'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i32'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i32'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %b128, <4 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %b256, <8 x i32> splat (i32 5))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %b512, <16 x i32> splat (i32 5))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshr.i32(i32 %a32, i32 %b32, i32 5)
@@ -1387,79 +1387,79 @@ define void @splatconstant_funnel_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512, i16 %b16, <8 x i16> %b128, <16 x i16> %b256, <32 x i16> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %b128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %b256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %b512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshr.i16(i16 %a16, i16 %b16, i16 3)
@@ -1472,79 +1472,79 @@ define void @splatconstant_funnel_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_funnel_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512, i8 %b8, <16 x i8> %b128, <32 x i8> %b256, <64 x i8> %b512) {
 ; SSE-LABEL: 'splatconstant_funnel_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_funnel_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_funnel_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_funnel_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_funnel_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_funnel_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_funnel_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_funnel_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_funnel_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_funnel_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_funnel_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %b128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %b256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %b512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshr.i8(i8 %a8, i8 %b8, i8 3)
@@ -2540,51 +2540,51 @@ define void @constant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x
 define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256, <8 x i64> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i64'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i64'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i64'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i64'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i64'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i64'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i64'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> <i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> <i64 7, i64 7, i64 7, i64 7>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a128, <2 x i64> %a128, <2 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %a256, <4 x i64> %a256, <4 x i64> splat (i64 7))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %a512, <8 x i64> %a512, <8 x i64> splat (i64 7))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I64    = call i64 @llvm.fshr.i64(i64 %a64, i64 %a64, i64 7)
@@ -2597,51 +2597,51 @@ define void @splatconstant_rotate_i64(i64 %a64, <2 x i64> %a128, <4 x i64> %a256
 define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256, <16 x i32> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i32'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i32'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i32'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512-LABEL: 'splatconstant_rotate_i32'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i32'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i32'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i32'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> <i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a128, <4 x i32> %a128, <4 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %a256, <8 x i32> %a256, <8 x i32> splat (i32 5))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %a512, <16 x i32> %a512, <16 x i32> splat (i32 5))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I32   = call i32 @llvm.fshr.i32(i32 %a32, i32 %a32, i32 5)
@@ -2654,79 +2654,79 @@ define void @splatconstant_rotate_i32(i32 %a32, <4 x i32> %a128, <8 x i32> %a256
 define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a256, <32 x i16> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i16'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i16'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i16'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i16'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i16'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i16'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i16'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i16'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i16'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i16'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i16'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a128, <8 x i16> %a128, <8 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %a256, <16 x i16> %a256, <16 x i16> splat (i16 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %a512, <32 x i16> %a512, <32 x i16> splat (i16 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I16    = call i16 @llvm.fshr.i16(i16 %a16, i16 %a16, i16 3)
@@ -2739,79 +2739,79 @@ define void @splatconstant_rotate_i16(i16 %a16, <8 x i16> %a128, <16 x i16> %a25
 define void @splatconstant_rotate_i8(i8 %a8, <16 x i8> %a128, <32 x i8> %a256, <64 x i8> %a512) {
 ; SSE-LABEL: 'splatconstant_rotate_i8'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX1-LABEL: 'splatconstant_rotate_i8'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX2-LABEL: 'splatconstant_rotate_i8'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512F-LABEL: 'splatconstant_rotate_i8'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512BW-LABEL: 'splatconstant_rotate_i8'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512DQ-LABEL: 'splatconstant_rotate_i8'
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512DQ-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512VBMI2-LABEL: 'splatconstant_rotate_i8'
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512VBMI2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; SLM-LABEL: 'splatconstant_rotate_i8'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; GLM-LABEL: 'splatconstant_rotate_i8'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; GLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; XOP-LABEL: 'splatconstant_rotate_i8'
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; XOP-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_rotate_i8'
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a128, <16 x i8> %a128, <16 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.fshr.v32i8(<32 x i8> %a256, <32 x i8> %a256, <32 x i8> splat (i8 3))
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %a512, <64 x i8> %a512, <64 x i8> splat (i8 3))
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %I8    = call i8 @llvm.fshr.i8(i8 %a8, i8 %a8, i8 3)
diff --git a/llvm/test/Analysis/CostModel/X86/mul-codesize.ll b/llvm/test/Analysis/CostModel/X86/mul-codesize.ll
index 4cf1df14387f86..75585cf0a6e85c 100644
--- a/llvm/test/Analysis/CostModel/X86/mul-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul-codesize.ll
@@ -190,154 +190,154 @@ define i32 @mul_constpow2() {
 define i32 @mul_uniformconstpow2() {
 ; SSE2-LABEL: 'mul_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'mul_uniformconstpow2'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'mul_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'mul_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'mul_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'mul_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'mul_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'mul_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = mul i64 undef, 16
@@ -542,154 +542,154 @@ define i32 @mul_constnegpow2() {
 define i32 @mul_uniformconstnegpow2() {
 ; SSE2-LABEL: 'mul_uniformconstnegpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'mul_uniformconstnegpow2'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'mul_uniformconstnegpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'mul_uniformconstnegpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'mul_uniformconstnegpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'mul_uniformconstnegpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'mul_uniformconstnegpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'mul_uniformconstnegpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = mul i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/X86/mul-latency.ll b/llvm/test/Analysis/CostModel/X86/mul-latency.ll
index 176c3ed41bfa8e..9245f2c5740f36 100644
--- a/llvm/test/Analysis/CostModel/X86/mul-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul-latency.ll
@@ -190,154 +190,154 @@ define i32 @mul_constpow2() {
 define i32 @mul_uniformconstpow2() {
 ; SSE2-LABEL: 'mul_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'mul_uniformconstpow2'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'mul_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'mul_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'mul_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'mul_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'mul_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'mul_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = mul i64 undef, 16
@@ -542,154 +542,154 @@ define i32 @mul_constnegpow2() {
 define i32 @mul_uniformconstnegpow2() {
 ; SSE2-LABEL: 'mul_uniformconstnegpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'mul_uniformconstnegpow2'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'mul_uniformconstnegpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'mul_uniformconstnegpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'mul_uniformconstnegpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'mul_uniformconstnegpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'mul_uniformconstnegpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'mul_uniformconstnegpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = mul i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll
index 020b62291012cc..2d1bf23e9699c9 100644
--- a/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul-sizelatency.ll
@@ -190,154 +190,154 @@ define i32 @mul_constpow2() {
 define i32 @mul_uniformconstpow2() {
 ; SSE2-LABEL: 'mul_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'mul_uniformconstpow2'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'mul_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'mul_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'mul_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'mul_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'mul_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'mul_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = mul i64 undef, 16
@@ -542,154 +542,154 @@ define i32 @mul_constnegpow2() {
 define i32 @mul_uniformconstnegpow2() {
 ; SSE2-LABEL: 'mul_uniformconstnegpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'mul_uniformconstnegpow2'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'mul_uniformconstnegpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'mul_uniformconstnegpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'mul_uniformconstnegpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'mul_uniformconstnegpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'mul_uniformconstnegpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'mul_uniformconstnegpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = mul i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/X86/mul.ll b/llvm/test/Analysis/CostModel/X86/mul.ll
index 7e51dbf60cf9b3..4602928a981eac 100644
--- a/llvm/test/Analysis/CostModel/X86/mul.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul.ll
@@ -190,154 +190,154 @@ define i32 @mul_constpow2() {
 define i32 @mul_uniformconstpow2() {
 ; SSE2-LABEL: 'mul_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'mul_uniformconstpow2'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'mul_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'mul_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'mul_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'mul_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'mul_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'mul_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = mul i64 undef, 16
@@ -542,154 +542,154 @@ define i32 @mul_constnegpow2() {
 define i32 @mul_uniformconstnegpow2() {
 ; SSE2-LABEL: 'mul_uniformconstnegpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'mul_uniformconstnegpow2'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'mul_uniformconstnegpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'mul_uniformconstnegpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'mul_uniformconstnegpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'mul_uniformconstnegpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'mul_uniformconstnegpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'mul_uniformconstnegpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I64 = mul i64 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = mul i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/X86/rem-codesize.ll b/llvm/test/Analysis/CostModel/X86/rem-codesize.ll
index b47580ba0abfd1..3c172dda6867b6 100644
--- a/llvm/test/Analysis/CostModel/X86/rem-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem-codesize.ll
@@ -186,21 +186,21 @@ define i32 @urem_const() {
 define i32 @srem_uniformconst() {
 ; CHECK-LABEL: 'srem_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 7
@@ -229,21 +229,21 @@ define i32 @srem_uniformconst() {
 define i32 @urem_uniformconst() {
 ; CHECK-LABEL: 'urem_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 7
@@ -529,154 +529,154 @@ define i32 @urem_constpow2() {
 define i32 @srem_uniformconstpow2() {
 ; SSE2-LABEL: 'srem_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'srem_uniformconstpow2'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'srem_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'srem_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 49 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 98 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 59 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 118 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'srem_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'srem_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 90 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 16
@@ -705,59 +705,59 @@ define i32 @srem_uniformconstpow2() {
 define i32 @urem_uniformconstpow2() {
 ; SSE-LABEL: 'urem_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'urem_uniformconstpow2'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'urem_uniformconstpow2'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 16
@@ -872,21 +872,21 @@ define i32 @urem_constnegpow2() {
 define i32 @srem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'srem_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, -16
@@ -915,21 +915,21 @@ define i32 @srem_uniformconstnegpow2() {
 define i32 @urem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'urem_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/X86/rem-latency.ll b/llvm/test/Analysis/CostModel/X86/rem-latency.ll
index 5264a75659d150..6c62ccb8df7188 100644
--- a/llvm/test/Analysis/CostModel/X86/rem-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem-latency.ll
@@ -186,21 +186,21 @@ define i32 @urem_const() {
 define i32 @srem_uniformconst() {
 ; CHECK-LABEL: 'srem_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 7
@@ -229,21 +229,21 @@ define i32 @srem_uniformconst() {
 define i32 @urem_uniformconst() {
 ; CHECK-LABEL: 'urem_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 7
@@ -510,135 +510,135 @@ define i32 @urem_constpow2() {
 define i32 @srem_uniformconstpow2() {
 ; SSE2-LABEL: 'srem_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'srem_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'srem_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'srem_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 47 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 39 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'srem_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %I64 = srem i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 148 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 148 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I32 = srem i32 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 58 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I16 = srem i16 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I8 = srem i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 45 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 174 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 16
@@ -667,59 +667,59 @@ define i32 @srem_uniformconstpow2() {
 define i32 @urem_uniformconstpow2() {
 ; SSE-LABEL: 'urem_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'urem_uniformconstpow2'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'urem_uniformconstpow2'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 16
@@ -834,21 +834,21 @@ define i32 @urem_constnegpow2() {
 define i32 @srem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'srem_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, -16
@@ -877,21 +877,21 @@ define i32 @srem_uniformconstnegpow2() {
 define i32 @urem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'urem_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll
index 817a0c7e070ac6..e0f0674c71f604 100644
--- a/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem-sizelatency.ll
@@ -186,21 +186,21 @@ define i32 @urem_const() {
 define i32 @srem_uniformconst() {
 ; CHECK-LABEL: 'srem_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 7
@@ -229,21 +229,21 @@ define i32 @srem_uniformconst() {
 define i32 @urem_uniformconst() {
 ; CHECK-LABEL: 'urem_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 7
@@ -510,135 +510,135 @@ define i32 @urem_constpow2() {
 define i32 @srem_uniformconstpow2() {
 ; SSE2-LABEL: 'srem_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'srem_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'srem_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 55 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 37 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 74 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 65 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 130 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'srem_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 17 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 62 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 84 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'srem_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 110 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 16
@@ -667,59 +667,59 @@ define i32 @srem_uniformconstpow2() {
 define i32 @urem_uniformconstpow2() {
 ; SSE-LABEL: 'urem_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'urem_uniformconstpow2'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'urem_uniformconstpow2'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 16
@@ -834,21 +834,21 @@ define i32 @urem_constnegpow2() {
 define i32 @srem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'srem_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, -16
@@ -877,21 +877,21 @@ define i32 @srem_uniformconstnegpow2() {
 define i32 @urem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'urem_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/X86/rem.ll b/llvm/test/Analysis/CostModel/X86/rem.ll
index 0e799d63741aa8..4138046ff961ed 100644
--- a/llvm/test/Analysis/CostModel/X86/rem.ll
+++ b/llvm/test/Analysis/CostModel/X86/rem.ll
@@ -490,97 +490,97 @@ define i32 @urem_const() {
 define i32 @srem_uniformconst() {
 ; SSE-LABEL: 'srem_uniformconst'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'srem_uniformconst'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'srem_uniformconst'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconst'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconst'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 7
@@ -609,97 +609,97 @@ define i32 @srem_uniformconst() {
 define i32 @urem_uniformconst() {
 ; SSE-LABEL: 'urem_uniformconst'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'urem_uniformconst'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'urem_uniformconst'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'urem_uniformconst'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'urem_uniformconst'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, 7
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 7
@@ -1004,173 +1004,173 @@ define i32 @urem_constpow2() {
 define i32 @srem_uniformconstpow2() {
 ; SSE2-LABEL: 'srem_uniformconstpow2'
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSSE3-LABEL: 'srem_uniformconstpow2'
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SSE42-LABEL: 'srem_uniformconstpow2'
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'srem_uniformconstpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 33 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'srem_uniformconstpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconstpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconstpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 11 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; SLM-LABEL: 'srem_uniformconstpow2'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16
-; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; GLM-LABEL: 'srem_uniformconstpow2'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; GLM-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; GLM-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %I8 = srem i8 undef, 16
-; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; GLM-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; GLM-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, 16
@@ -1199,59 +1199,59 @@ define i32 @srem_uniformconstpow2() {
 define i32 @urem_uniformconstpow2() {
 ; SSE-LABEL: 'urem_uniformconstpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX-LABEL: 'urem_uniformconstpow2'
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512-LABEL: 'urem_uniformconstpow2'
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, 16
@@ -1594,97 +1594,97 @@ define i32 @urem_constnegpow2() {
 define i32 @srem_uniformconstnegpow2() {
 ; SSE-LABEL: 'srem_uniformconstnegpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'srem_uniformconstnegpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'srem_uniformconstnegpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'srem_uniformconstnegpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'srem_uniformconstnegpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = srem i64 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = srem i8 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = srem i64 undef, -16
@@ -1713,97 +1713,97 @@ define i32 @srem_uniformconstnegpow2() {
 define i32 @urem_uniformconstnegpow2() {
 ; SSE-LABEL: 'urem_uniformconstnegpow2'
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16
-; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; SSE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX1-LABEL: 'urem_uniformconstnegpow2'
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX2-LABEL: 'urem_uniformconstnegpow2'
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 640 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512F-LABEL: 'urem_uniformconstnegpow2'
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; AVX512BW-LABEL: 'urem_uniformconstnegpow2'
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I64 = urem i64 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 120 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 240 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = urem i8 undef, -16
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %I64 = urem i64 undef, -16
diff --git a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll
index 204290bd4e40f2..39a74cf6509bcd 100644
--- a/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll
+++ b/llvm/test/Analysis/CostModel/X86/slm-arith-costs.ll
@@ -77,12 +77,12 @@ entry:
 define <4 x i32> @slm-costs_8_v4_zext_mul(<4 x i8> %a)  {
 ; SLM-LABEL: 'slm-costs_8_v4_zext_mul'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 255, i32 255>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, splat (i32 255)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 ; GLM-LABEL: 'slm-costs_8_v4_zext_mul'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i8> %a to <4 x i32>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %zext, <i32 255, i32 255, i32 255, i32 255>
+; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %res = mul nsw <4 x i32> %zext, splat (i32 255)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 entry:
@@ -242,12 +242,12 @@ entry:
 define <4 x i32> @slm-costs_16_v4_zext_mul(<4 x i16> %a)  {
 ; SLM-LABEL: 'slm-costs_16_v4_zext_mul'
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i16> %a to <4 x i32>
-; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %zext, <i32 65535, i32 65535, i32 65535, i32 65535>
+; SLM-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %res = mul nsw <4 x i32> %zext, splat (i32 65535)
 ; SLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 ; GLM-LABEL: 'slm-costs_16_v4_zext_mul'
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %zext = zext <4 x i16> %a to <4 x i32>
-; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, <i32 65535, i32 65535, i32 65535, i32 65535>
+; GLM-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %res = mul nsw <4 x i32> %zext, splat (i32 65535)
 ; GLM-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res
 ;
 entry:
diff --git a/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll b/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll
index 0986def7cef40d..495cc74b643c49 100644
--- a/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vdiv-cost.ll
@@ -10,7 +10,7 @@
 
 define <4 x i32> @test1(<4 x i32> %a) {
 ; CHECK-LABEL: 'test1'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <4 x i32> %a, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
 ;
   %div = udiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
@@ -19,19 +19,19 @@ define <4 x i32> @test1(<4 x i32> %a) {
 
 define <8 x i32> @test2(<8 x i32> %a) {
 ; SSE-LABEL: 'test2'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %div = udiv <8 x i32> %a, splat (i32 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div
 ;
 ; AVX1-LABEL: 'test2'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <8 x i32> %a, splat (i32 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div
 ;
 ; AVX2-LABEL: 'test2'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, splat (i32 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div
 ;
 ; AVX512-LABEL: 'test2'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %div = udiv <8 x i32> %a, splat (i32 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div
 ;
   %div = udiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
@@ -40,7 +40,7 @@ define <8 x i32> @test2(<8 x i32> %a) {
 
 define <8 x i16> @test3(<8 x i16> %a) {
 ; CHECK-LABEL: 'test3'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <8 x i16> %a, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
 ;
   %div = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
@@ -49,19 +49,19 @@ define <8 x i16> @test3(<8 x i16> %a) {
 
 define <16 x i16> @test4(<16 x i16> %a) {
 ; SSE-LABEL: 'test4'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %div = udiv <16 x i16> %a, splat (i16 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div
 ;
 ; AVX1-LABEL: 'test4'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %div = udiv <16 x i16> %a, splat (i16 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div
 ;
 ; AVX2-LABEL: 'test4'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i16> %a, splat (i16 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div
 ;
 ; AVX512-LABEL: 'test4'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = udiv <16 x i16> %a, splat (i16 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div
 ;
   %div = udiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
@@ -70,7 +70,7 @@ define <16 x i16> @test4(<16 x i16> %a) {
 
 define <8 x i16> @test5(<8 x i16> %a) {
 ; CHECK-LABEL: 'test5'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i16> %a, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
 ;
   %div = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
@@ -79,19 +79,19 @@ define <8 x i16> @test5(<8 x i16> %a) {
 
 define <16 x i16> @test6(<16 x i16> %a) {
 ; SSE-LABEL: 'test6'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <16 x i16> %a, splat (i16 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div
 ;
 ; AVX1-LABEL: 'test6'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <16 x i16> %a, splat (i16 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div
 ;
 ; AVX2-LABEL: 'test6'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i16> %a, splat (i16 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div
 ;
 ; AVX512-LABEL: 'test6'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <16 x i16> %a, splat (i16 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %div
 ;
   %div = sdiv <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7,i16 7, i16 7, i16 7, i16 7>
@@ -100,7 +100,7 @@ define <16 x i16> @test6(<16 x i16> %a) {
 
 define <16 x i8> @test7(<16 x i8> %a) {
 ; CHECK-LABEL: 'test7'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <16 x i8> %a, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div
 ;
   %div = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
@@ -109,7 +109,7 @@ define <16 x i8> @test7(<16 x i8> %a) {
 
 define <4 x i32> @test8(<4 x i32> %a) {
 ; CHECK-LABEL: 'test8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <4 x i32> %a, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
 ;
   %div = sdiv <4 x i32> %a, <i32 7, i32 7, i32 7, i32 7>
@@ -118,19 +118,19 @@ define <4 x i32> @test8(<4 x i32> %a) {
 
 define <8 x i32> @test9(<8 x i32> %a) {
 ; SSE-LABEL: 'test9'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %div = sdiv <8 x i32> %a, splat (i32 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div
 ;
 ; AVX1-LABEL: 'test9'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %div = sdiv <8 x i32> %a, splat (i32 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div
 ;
 ; AVX2-LABEL: 'test9'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, splat (i32 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div
 ;
 ; AVX512-LABEL: 'test9'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %div = sdiv <8 x i32> %a, splat (i32 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %div
 ;
   %div = sdiv <8 x i32> %a, <i32 7, i32 7, i32 7, i32 7,i32 7, i32 7, i32 7, i32 7>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll
index 9ff975665f13b6..195c61921219f0 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-codesize.ll
@@ -1391,23 +1391,23 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = ashr <2 x i64> %a, <i64 7, i64 7>
@@ -1416,31 +1416,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1449,31 +1449,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1482,7 +1482,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1491,31 +1491,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1524,31 +1524,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1557,7 +1557,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1566,39 +1566,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1607,39 +1607,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1648,35 +1648,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1685,35 +1685,35 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v32i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1722,35 +1722,35 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v64i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
index 67679f2cb85666..a3e4cf62764208 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost-inseltpoison.ll
@@ -1509,19 +1509,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v2i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
   %shift = ashr <2 x i64> %a, <i64 7, i64 7>
@@ -1530,27 +1530,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1559,27 +1559,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1588,7 +1588,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
   %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1597,27 +1597,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1626,27 +1626,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1655,7 +1655,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1664,43 +1664,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1709,43 +1709,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1754,43 +1754,43 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v16i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1799,43 +1799,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1844,43 +1844,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
index efd378ee0b5a61..1bdab183e548c5 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -1509,19 +1509,19 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v2i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
   %shift = ashr <2 x i64> %a, <i64 7, i64 7>
@@ -1530,27 +1530,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1559,27 +1559,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1588,7 +1588,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
   %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1597,27 +1597,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1626,27 +1626,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1655,7 +1655,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1664,43 +1664,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1709,43 +1709,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1754,43 +1754,43 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v16i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1799,43 +1799,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1844,43 +1844,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll
index ab300779b4341c..f96d0d4ef43cc2 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-latency.ll
@@ -1469,23 +1469,23 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = ashr <2 x i64> %a, <i64 7, i64 7>
@@ -1494,31 +1494,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1527,31 +1527,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1560,31 +1560,31 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v4i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v4i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1593,31 +1593,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1626,31 +1626,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1659,31 +1659,31 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1692,39 +1692,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1733,39 +1733,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1774,39 +1774,39 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1815,39 +1815,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1856,39 +1856,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll
index 1b51a2e0a1e6e5..c85f506d6b60ca 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-ashr-sizelatency.ll
@@ -1415,23 +1415,23 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v2i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v2i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v2i64'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v2i64'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <2 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = ashr <2 x i64> %a, <i64 7, i64 7>
@@ -1440,31 +1440,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1473,31 +1473,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1506,7 +1506,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <4 x i32> %a, splat (i32 5)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1515,31 +1515,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1548,31 +1548,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1581,7 +1581,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <8 x i16> %a, splat (i16 3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1590,39 +1590,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1631,39 +1631,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1672,35 +1672,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <16 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1709,39 +1709,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1750,39 +1750,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = ashr <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll
index 644fcbbfefdf95..dc6feadbbe17ad 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-codesize.ll
@@ -1359,7 +1359,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = lshr <2 x i64> %a, <i64 7, i64 7>
@@ -1368,31 +1368,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1401,31 +1401,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1434,7 +1434,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1443,31 +1443,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1476,31 +1476,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1509,7 +1509,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1518,39 +1518,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1559,39 +1559,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1600,35 +1600,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1637,39 +1637,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1678,39 +1678,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
index 34ea1a644f6cca..348ffebf220e0a 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost-inseltpoison.ll
@@ -1477,7 +1477,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
   %shift = lshr <2 x i64> %a, <i64 7, i64 7>
@@ -1486,27 +1486,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1515,27 +1515,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1544,7 +1544,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
   %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1553,27 +1553,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1582,27 +1582,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1611,7 +1611,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1620,43 +1620,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1665,43 +1665,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1710,23 +1710,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1735,43 +1735,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1780,43 +1780,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
index 93b844eddf1824..7a7187dc9d6429 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -1477,7 +1477,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
   %shift = lshr <2 x i64> %a, <i64 7, i64 7>
@@ -1486,27 +1486,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1515,27 +1515,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1544,7 +1544,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
   %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1553,27 +1553,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1582,27 +1582,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1611,7 +1611,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1620,43 +1620,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1665,43 +1665,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1710,23 +1710,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1735,43 +1735,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1780,43 +1780,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll
index f879a09d067e65..b9accec482622d 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-latency.ll
@@ -1473,27 +1473,27 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v2i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v2i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v2i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v2i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v2i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = lshr <2 x i64> %a, <i64 7, i64 7>
@@ -1502,27 +1502,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1531,27 +1531,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1560,27 +1560,27 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1589,27 +1589,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1618,27 +1618,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1647,27 +1647,27 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1676,35 +1676,35 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1713,35 +1713,35 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1750,35 +1750,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1787,35 +1787,35 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1824,35 +1824,35 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll
index fe472342e21445..87c0aee4a564de 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-lshr-sizelatency.ll
@@ -1375,7 +1375,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <2 x i64> %a, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = lshr <2 x i64> %a, <i64 7, i64 7>
@@ -1384,31 +1384,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1417,31 +1417,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1450,7 +1450,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <4 x i32> %a, splat (i32 5)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1459,31 +1459,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1492,31 +1492,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1525,7 +1525,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <8 x i16> %a, splat (i16 3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1534,39 +1534,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1575,39 +1575,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1616,35 +1616,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <16 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1653,39 +1653,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1694,39 +1694,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll
index 1045b827da7cad..2cf49533438f22 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-codesize.ll
@@ -1333,7 +1333,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = shl <2 x i64> %a, <i64 7, i64 7>
@@ -1342,31 +1342,31 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v4i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v4i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1375,31 +1375,31 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i64'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i64'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1408,7 +1408,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1417,31 +1417,31 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v8i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v8i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1450,31 +1450,31 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i32'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i32'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1483,7 +1483,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1492,39 +1492,39 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1533,39 +1533,39 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i16'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i16'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1574,35 +1574,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v16i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v16i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1611,39 +1611,39 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v32i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v32i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1652,39 +1652,39 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE2-LABEL: 'splatconstant_shift_v64i8'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; SSE42-LABEL: 'splatconstant_shift_v64i8'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
index f093d1c8ca358a..0d4cd2648d8058 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost-inseltpoison.ll
@@ -1557,7 +1557,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
   %shift = shl <2 x i64> %a, <i64 7, i64 7>
@@ -1566,27 +1566,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1595,27 +1595,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1624,7 +1624,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
   %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1633,27 +1633,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1662,27 +1662,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1691,7 +1691,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1700,43 +1700,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1745,43 +1745,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1790,23 +1790,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1815,43 +1815,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1860,43 +1860,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
index 09521cfa2d23d3..c2cbc32e328ecb 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -1557,7 +1557,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift
 ;
   %shift = shl <2 x i64> %a, <i64 7, i64 7>
@@ -1566,27 +1566,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift
 ;
   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1595,27 +1595,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift
 ;
   %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1624,7 +1624,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift
 ;
   %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1633,27 +1633,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift
 ;
   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1662,27 +1662,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift
 ;
   %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1691,7 +1691,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift
 ;
   %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1700,43 +1700,43 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v16i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift
 ;
   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1745,43 +1745,43 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift
 ;
   %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1790,23 +1790,23 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; XOP-LABEL: 'splatconstant_shift_v16i8'
-; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; XOP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i8'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1815,43 +1815,43 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v32i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift
 ;
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1860,43 +1860,43 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512VL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512VL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BWVL-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512BWVL-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift
 ;
   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll
index 3ae71daf50a306..18fd15031d28aa 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-latency.ll
@@ -1433,27 +1433,27 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v2i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v2i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v2i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v2i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v2i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v2i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = shl <2 x i64> %a, <i64 7, i64 7>
@@ -1462,27 +1462,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1491,27 +1491,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1520,27 +1520,27 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1549,27 +1549,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1578,27 +1578,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1607,27 +1607,27 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i16'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1636,35 +1636,35 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1673,35 +1673,35 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1710,35 +1710,35 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1747,35 +1747,35 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1784,35 +1784,35 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll
index 4256a73a7cf735..ad2d92681ffa9c 100644
--- a/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll
+++ b/llvm/test/Analysis/CostModel/X86/vshift-shl-sizelatency.ll
@@ -1459,7 +1459,7 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) {
 
 define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v2i64'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, <i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <2 x i64> %a, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %shift
 ;
   %shift = shl <2 x i64> %a, <i64 7, i64 7>
@@ -1468,27 +1468,27 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 
 define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v4i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v4i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v4i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v4i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v4i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %shift
 ;
   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
@@ -1497,27 +1497,27 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 
 define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i64'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i64'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i64'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i64'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i64'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i64> %a, splat (i64 7)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %shift
 ;
   %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
@@ -1526,7 +1526,7 @@ define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) {
 
 define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v4i32'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <4 x i32> %a, splat (i32 5)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %shift
 ;
   %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
@@ -1535,27 +1535,27 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 
 define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v8i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v8i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v8i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v8i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v8i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %shift
 ;
   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1564,27 +1564,27 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 
 define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i32'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i32'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i32'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i32'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
 ; AVX512-LABEL: 'splatconstant_shift_v16i32'
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i32> %a, splat (i32 5)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %shift
 ;
   %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -1593,7 +1593,7 @@ define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) {
 
 define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; CHECK-LABEL: 'splatconstant_shift_v8i16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <8 x i16> %a, splat (i16 3)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %shift
 ;
   %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1602,35 +1602,35 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 
 define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v16i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v16i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %shift
 ;
   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1639,35 +1639,35 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 
 define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i16'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i16'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i16'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i16'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i16'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i16'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i16'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <32 x i16> %a, splat (i16 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %shift
 ;
   %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
@@ -1676,31 +1676,31 @@ define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) {
 
 define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v16i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX-LABEL: 'splatconstant_shift_v16i8'
-; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v16i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v16i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v16i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v16i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <16 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %shift
 ;
   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1709,35 +1709,35 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 
 define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v32i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v32i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v32i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v32i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v32i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v32i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v32i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <32 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i8> %shift
 ;
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
@@ -1746,35 +1746,35 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 
 define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) {
 ; SSE-LABEL: 'splatconstant_shift_v64i8'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX1-LABEL: 'splatconstant_shift_v64i8'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX2-LABEL: 'splatconstant_shift_v64i8'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX1-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; XOPAVX2-LABEL: 'splatconstant_shift_v64i8'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512F-LABEL: 'splatconstant_shift_v64i8'
-; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512F-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512F-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512BW-LABEL: 'splatconstant_shift_v64i8'
-; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512BW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
 ; AVX512GFNI-LABEL: 'splatconstant_shift_v64i8'
-; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <64 x i8> %a, splat (i8 3)
 ; AVX512GFNI-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret <64 x i8> %shift
 ;
   %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
diff --git a/llvm/test/Analysis/ValueTracking/known-non-zero.ll b/llvm/test/Analysis/ValueTracking/known-non-zero.ll
index db2c4f3a1ed650..138cc9672a51e9 100644
--- a/llvm/test/Analysis/ValueTracking/known-non-zero.ll
+++ b/llvm/test/Analysis/ValueTracking/known-non-zero.ll
@@ -210,7 +210,7 @@ define i1 @shl_nz_bounded_cnt(i32 %cnt, i32 %y) {
 define <2 x i1> @shl_nz_bounded_cnt_vec_todo_no_common_bit(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @shl_nz_bounded_cnt_vec_todo_no_common_bit(
 ; CHECK-NEXT:    [[CNT:%.*]] = and <2 x i32> [[X:%.*]], <i32 16, i32 32>
-; CHECK-NEXT:    [[VAL:%.*]] = or <2 x i32> [[Y:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[VAL:%.*]] = or <2 x i32> [[Y:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[VAL]], [[CNT]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i32> [[SHL]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
@@ -315,7 +315,7 @@ define i1 @lshr_nz_bounded_cnt_fail(i32 %cnt, i32 %y) {
 
 define <2 x i1> @ashr_nz_bounded_cnt_vec_fail(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @ashr_nz_bounded_cnt_vec_fail(
-; CHECK-NEXT:    [[CNT:%.*]] = and <2 x i32> [[X:%.*]], <i32 24, i32 24>
+; CHECK-NEXT:    [[CNT:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 24)
 ; CHECK-NEXT:    [[VAL:%.*]] = or <2 x i32> [[Y:%.*]], <i32 131088, i32 268697601>
 ; CHECK-NEXT:    [[SHL:%.*]] = ashr <2 x i32> [[VAL]], [[CNT]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i32> [[SHL]], zeroinitializer
@@ -663,7 +663,7 @@ define <3 x i1> @bitcast_veci3_to_veci4_fail_not_multiple(<4 x i3> %xx, <3 x i4>
 
 define <4 x i1> @bitcast_fail_veci16_to_veci8(<2 x i16> %xx, <4 x i8> %ind) {
 ; CHECK-LABEL: @bitcast_fail_veci16_to_veci8(
-; CHECK-NEXT:    [[XA:%.*]] = add nuw nsw <2 x i16> [[XX:%.*]], <i16 1, i16 1>
+; CHECK-NEXT:    [[XA:%.*]] = add nuw nsw <2 x i16> [[XX:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[X:%.*]] = bitcast <2 x i16> [[XA]] to <4 x i8>
 ; CHECK-NEXT:    [[Z:%.*]] = or <4 x i8> [[X]], [[IND:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i8> [[Z]], zeroinitializer
diff --git a/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll b/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll
index fba907ab731b0b..f272d9009aed13 100644
--- a/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll
+++ b/llvm/test/Analysis/ValueTracking/knownbits-and-or-xor-lowbit.ll
@@ -91,10 +91,10 @@ define <2 x i1> @add_XY_or_bit0_is_one(<2 x i8> %x, <2 x i8> %C) nounwind {
 
 define <2 x i1> @sub_XY_and_bit0_is_zero_fail(<2 x i8> %x, <2 x i8> %C) nounwind {
 ; CHECK-LABEL: @sub_XY_and_bit0_is_zero_fail(
-; CHECK-NEXT:    [[C1:%.*]] = or <2 x i8> [[C:%.*]], <i8 8, i8 8>
+; CHECK-NEXT:    [[C1:%.*]] = or <2 x i8> [[C:%.*]], splat (i8 8)
 ; CHECK-NEXT:    [[Y:%.*]] = sub <2 x i8> [[X:%.*]], [[C1]]
 ; CHECK-NEXT:    [[W:%.*]] = and <2 x i8> [[X]], [[Y]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[W]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[W]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %C1 = or <2 x i8> %C, <i8 8, i8 8>
@@ -149,7 +149,7 @@ define <2 x i1> @sub_YX_xor_bit0_is_one_fail(<2 x i8> %x, <2 x i8> %C) nounwind
 ; CHECK-LABEL: @sub_YX_xor_bit0_is_one_fail(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i8> [[X:%.*]], [[C:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i8> [[X]], [[TMP1]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[TMP2]], <i8 -13, i8 -13>
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[TMP2]], splat (i8 -13)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %C1 = sub <2 x i8> %C, <i8 1, i8 1>
@@ -191,10 +191,10 @@ define i1 @add_YX_xor_bit0_is_one_fail(i8 %x, i8 %C) nounwind {
 
 define <2 x i1> @add_XY_or_bit0_is_one_fail(<2 x i8> %x, <2 x i8> %C) nounwind {
 ; CHECK-LABEL: @add_XY_or_bit0_is_one_fail(
-; CHECK-NEXT:    [[C1:%.*]] = add <2 x i8> [[C:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[C1:%.*]] = add <2 x i8> [[C:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[Y:%.*]] = add <2 x i8> [[C1]], [[X:%.*]]
 ; CHECK-NEXT:    [[W:%.*]] = or <2 x i8> [[X]], [[Y]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[W]], <i8 90, i8 90>
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[W]], splat (i8 90)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %C1 = add <2 x i8> %C, <i8 1, i8 1>
@@ -219,7 +219,7 @@ define <2 x i32> @add_and_eval_vec(<2 x i32> %x, <2 x i32> %C) {
 
 define <2 x i32> @add_xor_eval_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @add_xor_eval_vec(
-; CHECK-NEXT:    ret <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 1)
 ;
   %y = add <2 x i32> %x, <i32 1, i32 1>
   %z = xor <2 x i32> %y, %x
@@ -229,7 +229,7 @@ define <2 x i32> @add_xor_eval_vec(<2 x i32> %x) {
 
 define <2 x i32> @add_or_eval_vec(<2 x i32> %x, <2 x i32> %C) {
 ; CHECK-LABEL: @add_or_eval_vec(
-; CHECK-NEXT:    ret <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 1)
 ;
   %y = add <2 x i32> %x, <i32 1, i32 1>
   %z = or <2 x i32> %y, %x
diff --git a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll
index 407100dec1201e..a83abe87cde88d 100644
--- a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll
+++ b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll
@@ -82,7 +82,7 @@ define i32 @blsmsk_add_eval(i32 %x) {
 
 define <2 x i32> @blsmsk_add_eval_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @blsmsk_add_eval_vec(
-; CHECK-NEXT:    ret <2 x i32> <i32 33, i32 33>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 33)
 ;
   %x1 = or <2 x i32> %x, <i32 9, i32 9>
   %x2 = add <2 x i32> %x1, <i32 -1, i32 -1>
@@ -115,7 +115,7 @@ define i32 @blsmsk_or_eval(i32 %x) {
 
 define <2 x i32> @blsmsk_or_eval_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @blsmsk_or_eval_vec(
-; CHECK-NEXT:    ret <2 x i32> <i32 33, i32 33>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 33)
 ;
   %x1 = or <2 x i32> %x, <i32 9, i32 9>
   %x2 = add <2 x i32> %x1, <i32 -1, i32 -1>
@@ -174,8 +174,8 @@ define i32 @blsmsk_and_eval2(i32 %x) {
 
 define <2 x i32> @blsmsk_and_eval3_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @blsmsk_and_eval3_vec(
-; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 34, i32 34>
-; CHECK-NEXT:    [[X2:%.*]] = add <2 x i32> [[X1]], <i32 63, i32 63>
+; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 34)
+; CHECK-NEXT:    [[X2:%.*]] = add <2 x i32> [[X1]], splat (i32 63)
 ; CHECK-NEXT:    [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]]
 ; CHECK-NEXT:    [[Z:%.*]] = and <2 x i32> [[X3]], <i32 16, i32 32>
 ; CHECK-NEXT:    ret <2 x i32> [[Z]]
@@ -242,9 +242,9 @@ define <2 x i32> @blsmsk_add_eval_assume_vec(<2 x i32> %x) {
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP0]])
 ; CHECK-NEXT:    [[CMP1:%.*]] = extractelement <2 x i1> [[CMP]], i64 1
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP1]])
-; CHECK-NEXT:    [[X2:%.*]] = add <2 x i32> [[X]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[X2:%.*]] = add <2 x i32> [[X]], splat (i32 -1)
 ; CHECK-NEXT:    [[X3:%.*]] = xor <2 x i32> [[X2]], [[X]]
-; CHECK-NEXT:    [[Z:%.*]] = add <2 x i32> [[X3]], <i32 32, i32 32>
+; CHECK-NEXT:    [[Z:%.*]] = add <2 x i32> [[X3]], splat (i32 32)
 ; CHECK-NEXT:    ret <2 x i32> [[Z]]
 ;
   %lb = and <2 x i32> %x, <i32 1, i32 1>
@@ -335,10 +335,10 @@ define i1 @blsi_ne_is_true(i32 %x) {
 
 define <2 x i1> @blsi_ge_is_false_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @blsi_ge_is_false_vec(
-; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 10, i32 10>
+; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 10)
 ; CHECK-NEXT:    [[X2:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]]
 ; CHECK-NEXT:    [[X3:%.*]] = and <2 x i32> [[X]], [[X2]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], <i32 7, i32 7>
+; CHECK-NEXT:    [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], splat (i32 7)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %x1 = or <2 x i32> %x, <i32 10, i32 10>
@@ -353,7 +353,7 @@ define <2 x i1> @blsi_ge_is_false_diff_vec(<2 x i32> %x) {
 ; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 10, i32 11>
 ; CHECK-NEXT:    [[X2:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]]
 ; CHECK-NEXT:    [[X3:%.*]] = and <2 x i32> [[X]], [[X2]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], <i32 7, i32 7>
+; CHECK-NEXT:    [[Z:%.*]] = icmp ugt <2 x i32> [[X3]], splat (i32 7)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %x1 = or <2 x i32> %x, <i32 10, i32 11>
@@ -399,7 +399,7 @@ define i32 @blsi_sub_eval(i32 %x) {
 
 define <2 x i32> @blsi_sub_eval_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @blsi_sub_eval_vec(
-; CHECK-NEXT:    ret <2 x i32> <i32 -31, i32 -31>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 -31)
 ;
   %x1 = or <2 x i32> %x, <i32 33, i32 33>
   %x2 = sub <2 x i32> <i32 0, i32 0>, %x1
@@ -421,7 +421,7 @@ define i32 @blsi_or_eval(i32 %x) {
 
 define <2 x i32> @blsi_xor_eval_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @blsi_xor_eval_vec(
-; CHECK-NEXT:    ret <2 x i32> <i32 33, i32 33>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 33)
 ;
   %x1 = or <2 x i32> %x, <i32 33, i32 33>
   %x2 = sub <2 x i32> <i32 0, i32 0>, %x1
@@ -443,10 +443,10 @@ define i32 @blsi_and_eval(i32 %x) {
 
 define <2 x i32> @blsi_and_eval2_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @blsi_and_eval2_vec(
-; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 30, i32 30>
+; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 30)
 ; CHECK-NEXT:    [[X2:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]]
 ; CHECK-NEXT:    [[X3:%.*]] = and <2 x i32> [[X]], [[X2]]
-; CHECK-NEXT:    [[Z:%.*]] = and <2 x i32> [[X3]], <i32 32, i32 32>
+; CHECK-NEXT:    [[Z:%.*]] = and <2 x i32> [[X3]], splat (i32 32)
 ; CHECK-NEXT:    ret <2 x i32> [[Z]]
 ;
   %x1 = or <2 x i32> %x, <i32 30, i32 30>
@@ -473,7 +473,7 @@ define i32 @blsi_and_eval3(i32 %x) {
 
 define <2 x i1> @blsi_eq_is_false_assume_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @blsi_eq_is_false_assume_vec(
-; CHECK-NEXT:    [[LB:%.*]] = and <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[LB:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[LB]], zeroinitializer
 ; CHECK-NEXT:    [[CMP0:%.*]] = extractelement <2 x i1> [[CMP]], i64 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP0]])
@@ -481,7 +481,7 @@ define <2 x i1> @blsi_eq_is_false_assume_vec(<2 x i32> %x) {
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP1]])
 ; CHECK-NEXT:    [[X2:%.*]] = sub <2 x i32> zeroinitializer, [[X]]
 ; CHECK-NEXT:    [[X3:%.*]] = and <2 x i32> [[X]], [[X2]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp eq <2 x i32> [[X3]], <i32 8, i32 8>
+; CHECK-NEXT:    [[Z:%.*]] = icmp eq <2 x i32> [[X3]], splat (i32 8)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %lb = and <2 x i32> %x, <i32 2, i32 2>
@@ -533,7 +533,7 @@ define <2 x i1> @blsi_cmp_eq_diff_bits_vec(<2 x i32> %x) {
 ; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 1, i32 2>
 ; CHECK-NEXT:    [[X2:%.*]] = sub <2 x i32> zeroinitializer, [[X1]]
 ; CHECK-NEXT:    [[X3:%.*]] = and <2 x i32> [[X1]], [[X2]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp eq <2 x i32> [[X3]], <i32 32, i32 32>
+; CHECK-NEXT:    [[Z:%.*]] = icmp eq <2 x i32> [[X3]], splat (i32 32)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %x1 = or <2 x i32> %x, <i32 1, i32 2>
@@ -577,10 +577,10 @@ define i32 @blsi_and_eval_assume(i32 %x) {
 
 define <2 x i1> @blsmsk_ne_no_proof_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @blsmsk_ne_no_proof_vec(
-; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 10, i32 10>
+; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 10)
 ; CHECK-NEXT:    [[X2:%.*]] = add nsw <2 x i32> [[X1]], <i32 -2, i32 -3>
 ; CHECK-NEXT:    [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp ne <2 x i32> [[X3]], <i32 8, i32 8>
+; CHECK-NEXT:    [[Z:%.*]] = icmp ne <2 x i32> [[X3]], splat (i32 8)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %x1 = or <2 x i32> %x, <i32 10, i32 10>
@@ -592,10 +592,10 @@ define <2 x i1> @blsmsk_ne_no_proof_vec(<2 x i32> %x) {
 
 define <2 x i32> @blsmsk_add_noeval_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @blsmsk_add_noeval_vec(
-; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 9, i32 9>
+; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 9)
 ; CHECK-NEXT:    [[X2:%.*]] = add <2 x i32> [[X1]], <i32 1, i32 -2>
 ; CHECK-NEXT:    [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]]
-; CHECK-NEXT:    [[Z:%.*]] = add <2 x i32> [[X3]], <i32 32, i32 32>
+; CHECK-NEXT:    [[Z:%.*]] = add <2 x i32> [[X3]], splat (i32 32)
 ; CHECK-NEXT:    ret <2 x i32> [[Z]]
 ;
   %x1 = or <2 x i32> %x, <i32 9, i32 9>
@@ -649,10 +649,10 @@ define i32 @blsmsk_add_no_eval2(i32 %x) {
 
 define <2 x i32> @blsmsk_xor_no_eval_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @blsmsk_xor_no_eval_vec(
-; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 34, i32 34>
+; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 34)
 ; CHECK-NEXT:    [[X2:%.*]] = add <2 x i32> [[X1]], <i32 -9, i32 1>
 ; CHECK-NEXT:    [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]]
-; CHECK-NEXT:    [[Z:%.*]] = xor <2 x i32> [[X3]], <i32 32, i32 32>
+; CHECK-NEXT:    [[Z:%.*]] = xor <2 x i32> [[X3]], splat (i32 32)
 ; CHECK-NEXT:    ret <2 x i32> [[Z]]
 ;
   %x1 = or <2 x i32> %x, <i32 34, i32 34>
@@ -733,10 +733,10 @@ define i32 @blsi_or_no_eval(i32 %x) {
 
 define <2 x i32> @blsi_or_no_partial_eval_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @blsi_or_no_partial_eval_vec(
-; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 30, i32 30>
+; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 30)
 ; CHECK-NEXT:    [[X2:%.*]] = sub nsw <2 x i32> <i32 0, i32 1>, [[X1]]
 ; CHECK-NEXT:    [[X3:%.*]] = and <2 x i32> [[X1]], [[X2]]
-; CHECK-NEXT:    [[Z:%.*]] = or <2 x i32> [[X3]], <i32 32, i32 32>
+; CHECK-NEXT:    [[Z:%.*]] = or <2 x i32> [[X3]], splat (i32 32)
 ; CHECK-NEXT:    ret <2 x i32> [[Z]]
 ;
   %x1 = or <2 x i32> %x, <i32 30, i32 30>
diff --git a/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll b/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll
index e2fe873d715cd6..5f2c0ed765cd1f 100644
--- a/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll
+++ b/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll
@@ -218,10 +218,10 @@ define <4 x i1> @hadd_shuffle_2st_negative_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[X]], <i32 3, i32 3, i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[Y]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[Y]], splat (i32 3)
 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> <i32 4, i32 1, i32 5, i32 6>
-; CHECK-NEXT:    [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 8)
 ; CHECK-NEXT:    ret <4 x i1> [[RET]]
 ;
 entry:
@@ -237,11 +237,11 @@ define <4 x i1> @hadd_shuffle_4th_negative_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: define <4 x i1> @hadd_shuffle_4th_negative_v4i32(
 ; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[X]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[X]], splat (i32 3)
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[Y]], <i32 3, i32 3, i32 -1, i32 -1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT:    [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 8)
 ; CHECK-NEXT:    ret <4 x i1> [[RET]]
 ;
 entry:
diff --git a/llvm/test/Analysis/ValueTracking/knownzero-shift.ll b/llvm/test/Analysis/ValueTracking/knownzero-shift.ll
index d4ed849c231f8f..f5be5c909859b0 100644
--- a/llvm/test/Analysis/ValueTracking/knownzero-shift.ll
+++ b/llvm/test/Analysis/ValueTracking/knownzero-shift.ll
@@ -67,8 +67,8 @@ define i232 @lshr_lshr(i232 %A) {
 
 define <2 x i32> @lshr_lshr_splat_vec(<2 x i32> %A) {
 ; CHECK-LABEL: @lshr_lshr_splat_vec(
-; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 28, i32 28>
-; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 4, i32 4>
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 28)
+; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 4)
 ; CHECK-NEXT:    ret <2 x i32> [[C]]
 ;
   %B = lshr <2 x i32> %A, <i32 28, i32 28>
diff --git a/llvm/test/Analysis/ValueTracking/numsignbits-shl.ll b/llvm/test/Analysis/ValueTracking/numsignbits-shl.ll
index e86d28ebbc1d26..5224d75a157d5b 100644
--- a/llvm/test/Analysis/ValueTracking/numsignbits-shl.ll
+++ b/llvm/test/Analysis/ValueTracking/numsignbits-shl.ll
@@ -124,16 +124,16 @@ define void @numsignbits_shl_zext_all_bits_shifted_out(i8 %x) {
 define void @numsignbits_shl_zext_vector(<2 x i8> %x) {
 ; CHECK-LABEL: define void @numsignbits_shl_zext_vector(
 ; CHECK-SAME: <2 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i8> [[X]], <i8 5, i8 5>
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i8> [[X]], splat (i8 5)
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[ASHR]] to <2 x i16>
-; CHECK-NEXT:    [[NSB4:%.*]] = shl <2 x i16> [[ZEXT]], <i16 10, i16 10>
-; CHECK-NEXT:    [[ADD14:%.*]] = and <2 x i16> [[NSB4]], <i16 15360, i16 15360>
+; CHECK-NEXT:    [[NSB4:%.*]] = shl <2 x i16> [[ZEXT]], splat (i16 10)
+; CHECK-NEXT:    [[ADD14:%.*]] = and <2 x i16> [[NSB4]], splat (i16 15360)
 ; CHECK-NEXT:    call void @escape2(<2 x i16> [[ADD14]])
-; CHECK-NEXT:    [[ADD13:%.*]] = and <2 x i16> [[NSB4]], <i16 7168, i16 7168>
+; CHECK-NEXT:    [[ADD13:%.*]] = and <2 x i16> [[NSB4]], splat (i16 7168)
 ; CHECK-NEXT:    call void @escape2(<2 x i16> [[ADD13]])
-; CHECK-NEXT:    [[ADD12:%.*]] = and <2 x i16> [[NSB4]], <i16 3072, i16 3072>
+; CHECK-NEXT:    [[ADD12:%.*]] = and <2 x i16> [[NSB4]], splat (i16 3072)
 ; CHECK-NEXT:    call void @escape2(<2 x i16> [[ADD12]])
-; CHECK-NEXT:    [[AND11:%.*]] = and <2 x i16> [[NSB4]], <i16 2048, i16 2048>
+; CHECK-NEXT:    [[AND11:%.*]] = and <2 x i16> [[NSB4]], splat (i16 2048)
 ; CHECK-NEXT:    [[ADD11:%.*]] = add nsw <2 x i16> [[AND11]], [[NSB4]]
 ; CHECK-NEXT:    call void @escape2(<2 x i16> [[ADD11]])
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Assembler/ConstantExprFold.ll b/llvm/test/Assembler/ConstantExprFold.ll
index 44a7511ea568b0..944e6f0236ecbf 100644
--- a/llvm/test/Assembler/ConstantExprFold.ll
+++ b/llvm/test/Assembler/ConstantExprFold.ll
@@ -41,8 +41,8 @@
 ; CHECK: @gep2 = global <2 x ptr> undef
 ; CHECK: @gep3 = global <2 x ptr> zeroinitializer
 ; CHECK: @gep4 = global <2 x ptr> zeroinitializer
-; CHECK: @bitcast1 = global <2 x i32> <i32 -1, i32 -1>
-; CHECK: @bitcast2 = global <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK: @bitcast1 = global <2 x i32> splat (i32 -1)
+; CHECK: @bitcast2 = global <4 x i16> splat (i16 -1)
 ;.
 define void @dummy() {
 ; CHECK-LABEL: @dummy(
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
index b69afa3ab1f3d6..7aaf599583c801 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
@@ -1357,17 +1357,17 @@ define amdgpu_kernel void @fdiv_fpmath_f32_vector(ptr addrspace(1) %out, <2 x fl
 define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath(ptr addrspace(1) %out, <2 x float> %x) {
 ; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath(
 ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:    [[NO_MD:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[NO_MD:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8
-; CHECK-NEXT:    [[MD_HALF_ULP:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]], !fpmath [[META1:![0-9]+]]
+; CHECK-NEXT:    [[MD_HALF_ULP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META1:![0-9]+]]
 ; CHECK-NEXT:    store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8
-; CHECK-NEXT:    [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8
-; CHECK-NEXT:    [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8
-; CHECK-NEXT:    [[AFN_25ULP:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]], !fpmath [[META0]]
+; CHECK-NEXT:    [[AFN_25ULP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META0]]
 ; CHECK-NEXT:    store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8
-; CHECK-NEXT:    [[FAST_25ULP:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]], !fpmath [[META0]]
+; CHECK-NEXT:    [[FAST_25ULP:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META0]]
 ; CHECK-NEXT:    store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -2114,7 +2114,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo
 ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath(
 ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
 ; IEEE-GOODFREXP-NEXT:    [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
-; IEEE-GOODFREXP-NEXT:    [[NO_MD:%.*]] = fdiv contract <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[SQRT_X_NO_MD]]
+; IEEE-GOODFREXP-NEXT:    [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]]
 ; IEEE-GOODFREXP-NEXT:    store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-GOODFREXP-NEXT:    [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]]
 ; IEEE-GOODFREXP-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0
@@ -2185,7 +2185,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo
 ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath(
 ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
 ; IEEE-BADFREXP-NEXT:    [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
-; IEEE-BADFREXP-NEXT:    [[NO_MD:%.*]] = fdiv contract <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[SQRT_X_NO_MD]]
+; IEEE-BADFREXP-NEXT:    [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]]
 ; IEEE-BADFREXP-NEXT:    store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
 ; IEEE-BADFREXP-NEXT:    [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]]
 ; IEEE-BADFREXP-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0
@@ -2256,7 +2256,7 @@ define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x flo
 ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath(
 ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] {
 ; DAZ-NEXT:    [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
-; DAZ-NEXT:    [[NO_MD:%.*]] = fdiv contract <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[SQRT_X_NO_MD]]
+; DAZ-NEXT:    [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]]
 ; DAZ-NEXT:    store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4
 ; DAZ-NEXT:    [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2:![0-9]+]]
 ; DAZ-NEXT:    [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll
index b8585120afa45f..e13f0fba2bdb7e 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll
@@ -2144,7 +2144,7 @@ define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) {
 ; VI-LABEL: @bitreverse_3xi15(
 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i15> [[A:%.*]] to <3 x i32>
 ; VI-NEXT:    [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]])
-; VI-NEXT:    [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], <i32 17, i32 17, i32 17>
+; VI-NEXT:    [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], splat (i32 17)
 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i15>
 ; VI-NEXT:    store volatile <3 x i15> [[TMP4]], ptr addrspace(1) undef, align 8
 ; VI-NEXT:    ret void
@@ -2842,7 +2842,7 @@ define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) {
 ; VI-LABEL: @bitreverse_3xi16(
 ; VI-NEXT:    [[TMP1:%.*]] = zext <3 x i16> [[A:%.*]] to <3 x i32>
 ; VI-NEXT:    [[TMP2:%.*]] = call <3 x i32> @llvm.bitreverse.v3i32(<3 x i32> [[TMP1]])
-; VI-NEXT:    [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], <i32 16, i32 16, i32 16>
+; VI-NEXT:    [[TMP3:%.*]] = lshr <3 x i32> [[TMP2]], splat (i32 16)
 ; VI-NEXT:    [[TMP4:%.*]] = trunc <3 x i32> [[TMP3]] to <3 x i16>
 ; VI-NEXT:    store volatile <3 x i16> [[TMP4]], ptr addrspace(1) undef, align 8
 ; VI-NEXT:    ret void
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
index d938c16bf6134c..e1d0dfd543ee4e 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll
@@ -69,13 +69,13 @@ define <2 x i8> @mul_v1i16(<1 x i16> %arg) {
 ;
 ; VI-LABEL: @mul_v1i16(
 ; VI-NEXT:  BB:
-; VI-NEXT:    [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], <i16 42>
+; VI-NEXT:    [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], splat (i16 42)
 ; VI-NEXT:    [[CAST:%.*]] = bitcast <1 x i16> [[MUL]] to <2 x i8>
 ; VI-NEXT:    ret <2 x i8> [[CAST]]
 ;
 ; DISABLED-LABEL: @mul_v1i16(
 ; DISABLED-NEXT:  BB:
-; DISABLED-NEXT:    [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], <i16 42>
+; DISABLED-NEXT:    [[MUL:%.*]] = mul <1 x i16> [[ARG:%.*]], splat (i16 42)
 ; DISABLED-NEXT:    [[CAST:%.*]] = bitcast <1 x i16> [[MUL]] to <2 x i8>
 ; DISABLED-NEXT:    ret <2 x i8> [[CAST]]
 ;
@@ -97,12 +97,12 @@ define <1 x i8> @mul_v1i8(<1 x i8> %arg) {
 ;
 ; VI-LABEL: @mul_v1i8(
 ; VI-NEXT:  BB:
-; VI-NEXT:    [[MUL:%.*]] = mul <1 x i8> [[ARG:%.*]], <i8 42>
+; VI-NEXT:    [[MUL:%.*]] = mul <1 x i8> [[ARG:%.*]], splat (i8 42)
 ; VI-NEXT:    ret <1 x i8> [[MUL]]
 ;
 ; DISABLED-LABEL: @mul_v1i8(
 ; DISABLED-NEXT:  BB:
-; DISABLED-NEXT:    [[MUL:%.*]] = mul <1 x i8> [[ARG:%.*]], <i8 42>
+; DISABLED-NEXT:    [[MUL:%.*]] = mul <1 x i8> [[ARG:%.*]], splat (i8 42)
 ; DISABLED-NEXT:    ret <1 x i8> [[MUL]]
 ;
 BB:
@@ -112,10 +112,10 @@ BB:
 
 define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; SI-LABEL: @smul24_v2i32(
-; SI-NEXT:    [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], <i32 8, i32 8>
-; SI-NEXT:    [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>
-; SI-NEXT:    [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], <i32 8, i32 8>
-; SI-NEXT:    [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], <i32 8, i32 8>
+; SI-NEXT:    [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], splat (i32 8)
+; SI-NEXT:    [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], splat (i32 8)
+; SI-NEXT:    [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], splat (i32 8)
+; SI-NEXT:    [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], splat (i32 8)
 ; SI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
 ; SI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
 ; SI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
@@ -127,10 +127,10 @@ define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; SI-NEXT:    ret <2 x i32> [[MUL]]
 ;
 ; VI-LABEL: @smul24_v2i32(
-; VI-NEXT:    [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], <i32 8, i32 8>
-; VI-NEXT:    [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>
-; VI-NEXT:    [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], <i32 8, i32 8>
-; VI-NEXT:    [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], <i32 8, i32 8>
+; VI-NEXT:    [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], splat (i32 8)
+; VI-NEXT:    [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], splat (i32 8)
+; VI-NEXT:    [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], splat (i32 8)
+; VI-NEXT:    [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], splat (i32 8)
 ; VI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
 ; VI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
 ; VI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
@@ -142,10 +142,10 @@ define <2 x i32> @smul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; VI-NEXT:    ret <2 x i32> [[MUL]]
 ;
 ; DISABLED-LABEL: @smul24_v2i32(
-; DISABLED-NEXT:    [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], <i32 8, i32 8>
-; DISABLED-NEXT:    [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], <i32 8, i32 8>
-; DISABLED-NEXT:    [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], <i32 8, i32 8>
-; DISABLED-NEXT:    [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], <i32 8, i32 8>
+; DISABLED-NEXT:    [[SHL_LHS:%.*]] = shl <2 x i32> [[LHS:%.*]], splat (i32 8)
+; DISABLED-NEXT:    [[LHS24:%.*]] = ashr <2 x i32> [[SHL_LHS]], splat (i32 8)
+; DISABLED-NEXT:    [[SHL_RHS:%.*]] = shl <2 x i32> [[RHS:%.*]], splat (i32 8)
+; DISABLED-NEXT:    [[RHS24:%.*]] = ashr <2 x i32> [[SHL_RHS]], splat (i32 8)
 ; DISABLED-NEXT:    [[MUL:%.*]] = mul <2 x i32> [[LHS24]], [[RHS24]]
 ; DISABLED-NEXT:    ret <2 x i32> [[MUL]]
 ;
@@ -184,8 +184,8 @@ define i32 @umul24_i32(i32 %lhs, i32 %rhs) {
 
 define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; SI-LABEL: @umul24_v2i32(
-; SI-NEXT:    [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], <i32 16777215, i32 16777215>
-; SI-NEXT:    [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], <i32 16777215, i32 16777215>
+; SI-NEXT:    [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], splat (i32 16777215)
+; SI-NEXT:    [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], splat (i32 16777215)
 ; SI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
 ; SI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
 ; SI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
@@ -197,8 +197,8 @@ define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; SI-NEXT:    ret <2 x i32> [[MUL]]
 ;
 ; VI-LABEL: @umul24_v2i32(
-; VI-NEXT:    [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], <i32 16777215, i32 16777215>
-; VI-NEXT:    [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], <i32 16777215, i32 16777215>
+; VI-NEXT:    [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], splat (i32 16777215)
+; VI-NEXT:    [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], splat (i32 16777215)
 ; VI-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32> [[LHS24]], i64 0
 ; VI-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[LHS24]], i64 1
 ; VI-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[RHS24]], i64 0
@@ -210,8 +210,8 @@ define <2 x i32> @umul24_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
 ; VI-NEXT:    ret <2 x i32> [[MUL]]
 ;
 ; DISABLED-LABEL: @umul24_v2i32(
-; DISABLED-NEXT:    [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], <i32 16777215, i32 16777215>
-; DISABLED-NEXT:    [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], <i32 16777215, i32 16777215>
+; DISABLED-NEXT:    [[LHS24:%.*]] = and <2 x i32> [[LHS:%.*]], splat (i32 16777215)
+; DISABLED-NEXT:    [[RHS24:%.*]] = and <2 x i32> [[RHS:%.*]], splat (i32 16777215)
 ; DISABLED-NEXT:    [[MUL:%.*]] = mul <2 x i32> [[LHS24]], [[RHS24]]
 ; DISABLED-NEXT:    ret <2 x i32> [[MUL]]
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll
index a5bea7cd738f1d..6840c75644b977 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll
@@ -100,7 +100,7 @@ define void @broken_phi() {
 ; GFX9-NEXT:  bb:
 ; GFX9-NEXT:    br label [[BB1:%.*]]
 ; GFX9:       bb1:
-; GFX9-NEXT:    [[I:%.*]] = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ]
+; GFX9-NEXT:    [[I:%.*]] = phi <4 x i8> [ splat (i8 1), [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ]
 ; GFX9-NEXT:    br i1 false, label [[BB3:%.*]], label [[BB2:%.*]]
 ; GFX9:       bb2:
 ; GFX9-NEXT:    br label [[BB3]]
@@ -118,7 +118,7 @@ define void @broken_phi() {
 ; GFX12-NEXT:  bb:
 ; GFX12-NEXT:    br label [[BB1:%.*]]
 ; GFX12:       bb1:
-; GFX12-NEXT:    [[I:%.*]] = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ]
+; GFX12-NEXT:    [[I:%.*]] = phi <4 x i8> [ splat (i8 1), [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ]
 ; GFX12-NEXT:    br i1 false, label [[BB3:%.*]], label [[BB2:%.*]]
 ; GFX12:       bb2:
 ; GFX12-NEXT:    br label [[BB3]]
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
index acdab29e85b91a..b494ff8ba1f5dd 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
@@ -541,7 +541,7 @@ define float @test_pow_afn_f32_neg0.0(float %x) {
 define <2 x float> @test_pow_afn_v2f32_0.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_0.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0.0, float 0.0>)
   ret <2 x float> %pow
@@ -550,7 +550,7 @@ define <2 x float> @test_pow_afn_v2f32_0.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg0.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg0.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -0.0, float -0.0>)
   ret <2 x float> %pow
@@ -559,7 +559,7 @@ define <2 x float> @test_pow_afn_v2f32_neg0.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0.0, float -0.0>)
   ret <2 x float> %pow
@@ -568,7 +568,7 @@ define <2 x float> @test_pow_afn_v2f32_plus_minus_0.0(<2 x float> %x) {
 define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <3 x float> splat (float 1.000000e+00)
 ;
   %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float 0.0, float poison, float 0.0>)
   ret <3 x float> %pow
@@ -577,7 +577,7 @@ define <3 x float> @test_pow_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x floa
 define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg0.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <3 x float> splat (float 1.000000e+00)
 ;
   %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float -0.0, float poison, float -0.0>)
   ret <3 x float> %pow
@@ -684,7 +684,7 @@ define <2 x float> @test_pow_afn_v2f32_1.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg1.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg1.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <2 x float> [[__POWRECIP]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -1.0, float -1.0>)
@@ -713,7 +713,7 @@ define <3 x float> @test_pow_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x floa
 define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_pow_afn_v3f32_neg1.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn <3 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <3 x float> [[__POWRECIP]]
 ;
   %pow = tail call afn <3 x float> @_Z3powDv3_fS_(<3 x float> %x, <3 x float> <float -1.0, float poison, float -1.0>)
@@ -753,7 +753,7 @@ define <2 x float> @test_pow_afn_v2f32_2.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg2.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg2.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -2, i32 -2>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -2.0, float -2.0>)
@@ -793,7 +793,7 @@ define float @test_pow_afn_f32_neg3.0(float %x) {
 define <2 x float> @test_pow_afn_v2f32_3.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_3.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 3, i32 3>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 3))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 3.0, float 3.0>)
@@ -803,7 +803,7 @@ define <2 x float> @test_pow_afn_v2f32_3.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg3.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg3.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -3, i32 -3>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -3.0, float -3.0>)
@@ -843,7 +843,7 @@ define float @test_pow_afn_f32_neg3.99(float %x) {
 define <2 x float> @test_pow_afn_v2f32_3.99(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_3.99
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 0x400FEB8520000000, float 0x400FEB8520000000>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0x400FEB8520000000))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0x400FEB8520000000, float 0x400FEB8520000000>)
@@ -853,7 +853,7 @@ define <2 x float> @test_pow_afn_v2f32_3.99(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg3.99(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg3.99
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 0xC00FEB8520000000, float 0xC00FEB8520000000>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0xC00FEB8520000000))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 0xC00FEB8520000000, float 0xC00FEB8520000000>)
@@ -893,7 +893,7 @@ define float @test_pow_afn_f32_neg8.0(float %x) {
 define <2 x float> @test_pow_afn_v2f32_8.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_8.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 8, i32 8>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 8))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 8.0, float 8.0>)
@@ -903,7 +903,7 @@ define <2 x float> @test_pow_afn_v2f32_8.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg8.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg8.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -8, i32 -8>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -8))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -8.0, float -8.0>)
@@ -943,7 +943,7 @@ define float @test_pow_afn_f32_neg12.0(float %x) {
 define <2 x float> @test_pow_afn_v2f32_12.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_12.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 12, i32 12>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 12))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 12.0, float 12.0>)
@@ -953,7 +953,7 @@ define <2 x float> @test_pow_afn_v2f32_12.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg12.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg12.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -12, i32 -12>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -12))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -12.0, float -12.0>)
@@ -993,7 +993,7 @@ define float @test_pow_afn_f32_neg13.0(float %x) {
 define <2 x float> @test_pow_afn_v2f32_13.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_13.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 13, i32 13>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 13))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 13.0, float 13.0>)
@@ -1003,7 +1003,7 @@ define <2 x float> @test_pow_afn_v2f32_13.0(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_neg13.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_neg13.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -13, i32 -13>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call afn <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -13))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -13.0, float -13.0>)
@@ -1432,7 +1432,7 @@ define float @test_pow_f32__y_n_2_5(float %x) {
 define <2 x float> @test_pow_v2f32__y_0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
   %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> zeroinitializer)
   ret <2 x float> %pow
@@ -1441,7 +1441,7 @@ define <2 x float> @test_pow_v2f32__y_0(<2 x float> %x) {
 define <2 x float> @test_pow_v2f32__y_n0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
   %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -0.0, float -0.0>)
   ret <2 x float> %pow
@@ -1459,7 +1459,7 @@ define <2 x float> @test_pow_v2f32__y_1(<2 x float> %x) {
 define <2 x float> @test_pow_v2f32__y_n1(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n1
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <2 x float> [[__POWRECIP]]
 ;
   %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -1.0, float -1.0>)
@@ -1479,7 +1479,7 @@ define <2 x float> @test_pow_v2f32__y_2(<2 x float> %x) {
 define <2 x float> @test_pow_v2f32__y_n2(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n2
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -2, i32 -2>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -2.0, float -2.0>)
@@ -1509,7 +1509,7 @@ define <2 x float> @test_pow_v2f32__y_neg_half(<2 x float> %x) {
 define <2 x float> @test_pow_v2f32__y_3(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_3
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 3, i32 3>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 3))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 3.0, float 3.0>)
@@ -1519,7 +1519,7 @@ define <2 x float> @test_pow_v2f32__y_3(<2 x float> %x) {
 define <2 x float> @test_pow_v2f32__y_n3(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n3
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -3, i32 -3>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -3.0,float -3.0>)
@@ -1529,7 +1529,7 @@ define <2 x float> @test_pow_v2f32__y_n3(<2 x float> %x) {
 define <2 x float> @test_pow_v2f32__y_2_5(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_2_5
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 2.500000e+00, float 2.500000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 2.500000e+00))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 2.5, float 2.5>)
@@ -1539,7 +1539,7 @@ define <2 x float> @test_pow_v2f32__y_2_5(<2 x float> %x) {
 define <2 x float> @test_pow_v2f32__y_n_2_5(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_v2f32__y_n_2_5
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float -2.500000e+00, float -2.500000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -2.500000e+00))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float -2.5, float -2.5>)
@@ -1814,7 +1814,7 @@ define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4(<2 x float> %x) {
 define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_afn_v2f32_nnan_ninf__y_4_5
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> <float 4.500000e+00, float 4.500000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn <2 x float> @_Z3powDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 4.500000e+00))
 ; CHECK-NEXT:    ret <2 x float> [[POW]]
 ;
   %pow = tail call afn nnan ninf <2 x float> @_Z3powDv2_fS_(<2 x float> %x, <2 x float> <float 4.5, float 4.5>)
@@ -2003,7 +2003,7 @@ define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4(<2 x double> %x) {
 define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4_5(<2 x double> %x) {
 ; CHECK-LABEL: define <2 x double> @test_pow_afn_v2f64_nnan_ninf__y_4_5
 ; CHECK-SAME: (<2 x double> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn <2 x double> @_Z3powDv2_dS_(<2 x double> [[X]], <2 x double> <double 4.500000e+00, double 4.500000e+00>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn <2 x double> @_Z3powDv2_dS_(<2 x double> [[X]], <2 x double> splat (double 4.500000e+00))
 ; CHECK-NEXT:    ret <2 x double> [[POW]]
 ;
   %pow = tail call afn nnan ninf <2 x double> @_Z3powDv2_dS_(<2 x double> %x, <2 x double> <double 4.5, double 4.5>)
@@ -2157,7 +2157,7 @@ define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4(<2 x half> %x) {
 define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4_5(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_pow_afn_v2f16_nnan_ninf__y_4_5
 ; CHECK-SAME: (<2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn <2 x half> @_Z3powDv2_DhS_(<2 x half> [[X]], <2 x half> <half 0xH4480, half 0xH4480>)
+; CHECK-NEXT:    [[POW:%.*]] = tail call nnan ninf afn <2 x half> @_Z3powDv2_DhS_(<2 x half> [[X]], <2 x half> splat (half 0xH4480))
 ; CHECK-NEXT:    ret <2 x half> [[POW]]
 ;
   %pow = tail call afn nnan ninf <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 4.5, half 4.5>)
@@ -2395,7 +2395,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_sitofp(<2 x floa
 ; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
-; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], <i32 31, i32 31>
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31)
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
@@ -2444,7 +2444,7 @@ define <2 x float> @test_pow_afn_nnan_ninf_v2f32_known_integral_uitofp(<2 x floa
 ; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float>
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
-; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], <i32 31, i32 31>
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 31)
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
index bd4b86f0387666..f9c359bc114ed3 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pown.ll
@@ -277,7 +277,7 @@ define <2 x float> @test_pown_v2f32__y_0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pown_v2f32__y_0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
 entry:
   %call = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> %x, <2 x i32> zeroinitializer)
@@ -288,7 +288,7 @@ define <2 x float> @test_pown_v2f32__y_0_undef(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pown_v2f32__y_0_undef
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
 entry:
   %call = tail call <2 x float> @_Z4pownDv2_fDv2_i(<2 x float> %x, <2 x i32> <i32 0, i32 poison>)
@@ -299,7 +299,7 @@ define <3 x float> @test_pown_v3f32__y_0(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_pown_v3f32__y_0
 ; CHECK-SAME: (<3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <3 x float> splat (float 1.000000e+00)
 ;
 entry:
   %call = tail call <3 x float> @_Z4pownDv3_fDv3_i(<3 x float> %x, <3 x i32> zeroinitializer)
@@ -310,7 +310,7 @@ define <4 x float> @test_pown_v4f32__y_0(<4 x float> %x) {
 ; CHECK-LABEL: define <4 x float> @test_pown_v4f32__y_0
 ; CHECK-SAME: (<4 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <4 x float> splat (float 1.000000e+00)
 ;
 entry:
   %call = tail call <4 x float> @_Z4pownDv4_fDv4_i(<4 x float> %x, <4 x i32> zeroinitializer)
@@ -321,7 +321,7 @@ define <8 x float> @test_pown_v8f32__y_0(<8 x float> %x) {
 ; CHECK-LABEL: define <8 x float> @test_pown_v8f32__y_0
 ; CHECK-SAME: (<8 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <8 x float> splat (float 1.000000e+00)
 ;
 entry:
   %call = tail call <8 x float> @_Z4pownDv8_fDv8_i(<8 x float> %x, <8 x i32> zeroinitializer)
@@ -332,7 +332,7 @@ define <16 x float> @test_pown_v16f32__y_0(<16 x float> %x) {
 ; CHECK-LABEL: define <16 x float> @test_pown_v16f32__y_0
 ; CHECK-SAME: (<16 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret <16 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <16 x float> splat (float 1.000000e+00)
 ;
 entry:
   %call = tail call <16 x float> @_Z4pownDv16_fDv16_i(<16 x float> %x, <16 x i32> zeroinitializer)
@@ -527,7 +527,7 @@ define <2 x float> @test_pown_v2f32__y_neg1(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pown_v2f32__y_neg1
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <2 x float> [[__POWRECIP]]
 ;
 entry:
@@ -539,7 +539,7 @@ define <3 x float> @test_pown_v3f32__y_neg1(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_pown_v3f32__y_neg1
 ; CHECK-SAME: (<3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <3 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <3 x float> [[__POWRECIP]]
 ;
 entry:
@@ -551,7 +551,7 @@ define <3 x float> @test_pown_v3f32__y_neg1_undef(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_pown_v3f32__y_neg1_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <3 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <3 x float> [[__POWRECIP]]
 ;
 entry:
@@ -563,7 +563,7 @@ define <4 x float> @test_pown_v4f32__y_neg1(<4 x float> %x) {
 ; CHECK-LABEL: define <4 x float> @test_pown_v4f32__y_neg1
 ; CHECK-SAME: (<4 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <4 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <4 x float> [[__POWRECIP]]
 ;
 entry:
@@ -575,7 +575,7 @@ define <8 x float> @test_pown_v8f32__y_neg1(<8 x float> %x) {
 ; CHECK-LABEL: define <8 x float> @test_pown_v8f32__y_neg1
 ; CHECK-SAME: (<8 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <8 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <8 x float> [[__POWRECIP]]
 ;
 entry:
@@ -587,7 +587,7 @@ define <16 x float> @test_pown_v16f32__y_neg1(<16 x float> %x) {
 ; CHECK-LABEL: define <16 x float> @test_pown_v16f32__y_neg1
 ; CHECK-SAME: (<16 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <16 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv <16 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <16 x float> [[__POWRECIP]]
 ;
 entry:
@@ -698,7 +698,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32(<2 x float> %x, <2 x i32> %y)
 ; CHECK-NEXT:    [[POWNI2F:%.*]] = sitofp <2 x i32> [[Y]] to <2 x float>
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x float> [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x float> @llvm.exp2.v2f32(<2 x float> [[__YLOGX]])
-; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], <i32 31, i32 31>
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i32> [[Y]], splat (i32 31)
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i32> [[__YEVEN]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x float> [[__EXP2]] to <2 x i32>
@@ -744,7 +744,7 @@ define <2 x double> @test_pown_afn_nnan_ninf_v2f64(<2 x double> %x, <2 x i32> %y
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x double> [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x double> @_Z4exp2Dv2_d(<2 x double> [[__YLOGX]])
 ; CHECK-NEXT:    [[__YTOU:%.*]] = zext <2 x i32> [[Y]] to <2 x i64>
-; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU]], <i64 63, i64 63>
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i64> [[__YTOU]], splat (i64 63)
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x double> [[X]] to <2 x i64>
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i64> [[__YEVEN]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[__EXP2]] to <2 x i64>
@@ -790,7 +790,7 @@ define <2 x half> @test_pown_afn_nnan_ninf_v2f16(<2 x half> %x, <2 x i32> %y) {
 ; CHECK-NEXT:    [[__YLOGX:%.*]] = fmul nnan ninf afn <2 x half> [[__LOG2]], [[POWNI2F]]
 ; CHECK-NEXT:    [[__EXP2:%.*]] = call nnan ninf afn <2 x half> @llvm.exp2.v2f16(<2 x half> [[__YLOGX]])
 ; CHECK-NEXT:    [[__YTOU:%.*]] = trunc <2 x i32> [[Y]] to <2 x i16>
-; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], <i16 15, i16 15>
+; CHECK-NEXT:    [[__YEVEN:%.*]] = shl <2 x i16> [[__YTOU]], splat (i16 15)
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x half> [[X]] to <2 x i16>
 ; CHECK-NEXT:    [[__POW_SIGN:%.*]] = and <2 x i16> [[__YEVEN]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x half> [[__EXP2]] to <2 x i16>
@@ -1001,7 +1001,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32__y_neg3(<2 x float> %x) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[X]]
 ; CHECK-NEXT:    [[__POWPROD:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[__POWX2]]
-; CHECK-NEXT:    [[__1POWPROD:%.*]] = fdiv nnan ninf afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[__POWPROD]]
+; CHECK-NEXT:    [[__1POWPROD:%.*]] = fdiv nnan ninf afn <2 x float> splat (float 1.000000e+00), [[__POWPROD]]
 ; CHECK-NEXT:    ret <2 x float> [[__1POWPROD]]
 ;
 entry:
@@ -1015,7 +1015,7 @@ define <2 x float> @test_pown_afn_nnan_ninf_v2f32__y_neg4(<2 x float> %x) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[__POWX2:%.*]] = fmul nnan ninf afn <2 x float> [[X]], [[X]]
 ; CHECK-NEXT:    [[__POWX21:%.*]] = fmul nnan ninf afn <2 x float> [[__POWX2]], [[__POWX2]]
-; CHECK-NEXT:    [[__1POWPROD:%.*]] = fdiv nnan ninf afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[__POWX21]]
+; CHECK-NEXT:    [[__1POWPROD:%.*]] = fdiv nnan ninf afn <2 x float> splat (float 1.000000e+00), [[__POWX21]]
 ; CHECK-NEXT:    ret <2 x float> [[__1POWPROD]]
 ;
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll
index 1a92ca8960a776..c2b2c693e742ae 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-powr.ll
@@ -517,7 +517,7 @@ define float @test_powr_afn_f32_neg0.0(float %x) {
 define <2 x float> @test_powr_afn_v2f32_0.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_0.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float 0.0, float 0.0>)
   ret <2 x float> %powr
@@ -526,7 +526,7 @@ define <2 x float> @test_powr_afn_v2f32_0.0(<2 x float> %x) {
 define <2 x float> @test_powr_afn_v2f32_neg0.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg0.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float -0.0, float -0.0>)
   ret <2 x float> %powr
@@ -545,7 +545,7 @@ define <2 x float> @test_powr_afn_v2f32_plus_minus_0.0(<2 x float> %x) {
 define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <3 x float> splat (float 1.000000e+00)
 ;
   %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float 0.0, float poison, float 0.0>)
   ret <3 x float> %powr
@@ -554,7 +554,7 @@ define <3 x float> @test_powr_afn_v3f32_0.0_splat_undef(<3 x float> %x, <3 x flo
 define <3 x float> @test_powr_afn_v3f32_neg0.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg0.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    ret <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <3 x float> splat (float 1.000000e+00)
 ;
   %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float -0.0, float poison, float -0.0>)
   ret <3 x float> %powr
@@ -661,7 +661,7 @@ define <2 x float> @test_powr_afn_v2f32_1.0(<2 x float> %x) {
 define <2 x float> @test_powr_afn_v2f32_neg1.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg1.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <2 x float> [[__POWRECIP]]
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float -1.0, float -1.0>)
@@ -690,7 +690,7 @@ define <3 x float> @test_powr_afn_v3f32_1.0_splat_undef(<3 x float> %x, <3 x flo
 define <3 x float> @test_powr_afn_v3f32_neg1.0_splat_undef(<3 x float> %x, <3 x float> %y) {
 ; CHECK-LABEL: define <3 x float> @test_powr_afn_v3f32_neg1.0_splat_undef
 ; CHECK-SAME: (<3 x float> [[X:%.*]], <3 x float> [[Y:%.*]]) {
-; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__POWRECIP:%.*]] = fdiv afn <3 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <3 x float> [[__POWRECIP]]
 ;
   %powr = tail call afn <3 x float> @_Z4powrDv3_fS_(<3 x float> %x, <3 x float> <float -1.0, float poison, float -1.0>)
@@ -730,7 +730,7 @@ define <2 x float> @test_powr_afn_v2f32_2.0(<2 x float> %x) {
 define <2 x float> @test_powr_afn_v2f32_neg2.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg2.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> <float -2.000000e+00, float -2.000000e+00>)
+; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -2.000000e+00))
 ; CHECK-NEXT:    ret <2 x float> [[POWR]]
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float -2.0, float -2.0>)
@@ -770,7 +770,7 @@ define float @test_powr_afn_f32_neg3.0(float %x) {
 define <2 x float> @test_powr_afn_v2f32_3.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_3.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> <float 3.000000e+00, float 3.000000e+00>)
+; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 3.000000e+00))
 ; CHECK-NEXT:    ret <2 x float> [[POWR]]
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float 3.0, float 3.0>)
@@ -780,7 +780,7 @@ define <2 x float> @test_powr_afn_v2f32_3.0(<2 x float> %x) {
 define <2 x float> @test_powr_afn_v2f32_neg3.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg3.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> <float -3.000000e+00, float -3.000000e+00>)
+; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -3.000000e+00))
 ; CHECK-NEXT:    ret <2 x float> [[POWR]]
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float -3.0, float -3.0>)
@@ -820,7 +820,7 @@ define float @test_powr_afn_f32_neg3.99(float %x) {
 define <2 x float> @test_powr_afn_v2f32_3.99(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_3.99
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> <float 0x400FEB8520000000, float 0x400FEB8520000000>)
+; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0x400FEB8520000000))
 ; CHECK-NEXT:    ret <2 x float> [[POWR]]
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float 0x400FEB8520000000, float 0x400FEB8520000000>)
@@ -830,7 +830,7 @@ define <2 x float> @test_powr_afn_v2f32_3.99(<2 x float> %x) {
 define <2 x float> @test_powr_afn_v2f32_neg3.99(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg3.99
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> <float 0xC00FEB8520000000, float 0xC00FEB8520000000>)
+; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 0xC00FEB8520000000))
 ; CHECK-NEXT:    ret <2 x float> [[POWR]]
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float 0xC00FEB8520000000, float 0xC00FEB8520000000>)
@@ -870,7 +870,7 @@ define float @test_powr_afn_f32_neg8.0(float %x) {
 define <2 x float> @test_powr_afn_v2f32_8.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_8.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> <float 8.000000e+00, float 8.000000e+00>)
+; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 8.000000e+00))
 ; CHECK-NEXT:    ret <2 x float> [[POWR]]
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float 8.0, float 8.0>)
@@ -880,7 +880,7 @@ define <2 x float> @test_powr_afn_v2f32_8.0(<2 x float> %x) {
 define <2 x float> @test_powr_afn_v2f32_neg8.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg8.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> <float -8.000000e+00, float -8.000000e+00>)
+; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -8.000000e+00))
 ; CHECK-NEXT:    ret <2 x float> [[POWR]]
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float -8.0, float -8.0>)
@@ -920,7 +920,7 @@ define float @test_powr_afn_f32_neg12.0(float %x) {
 define <2 x float> @test_powr_afn_v2f32_12.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_12.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> <float 1.200000e+01, float 1.200000e+01>)
+; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 1.200000e+01))
 ; CHECK-NEXT:    ret <2 x float> [[POWR]]
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float 12.0, float 12.0>)
@@ -930,7 +930,7 @@ define <2 x float> @test_powr_afn_v2f32_12.0(<2 x float> %x) {
 define <2 x float> @test_powr_afn_v2f32_neg12.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg12.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> <float -1.200000e+01, float -1.200000e+01>)
+; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -1.200000e+01))
 ; CHECK-NEXT:    ret <2 x float> [[POWR]]
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float -12.0, float -12.0>)
@@ -970,7 +970,7 @@ define float @test_powr_afn_f32_neg13.0(float %x) {
 define <2 x float> @test_powr_afn_v2f32_13.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_13.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> <float 1.300000e+01, float 1.300000e+01>)
+; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float 1.300000e+01))
 ; CHECK-NEXT:    ret <2 x float> [[POWR]]
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float 13.0, float 13.0>)
@@ -980,7 +980,7 @@ define <2 x float> @test_powr_afn_v2f32_13.0(<2 x float> %x) {
 define <2 x float> @test_powr_afn_v2f32_neg13.0(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_powr_afn_v2f32_neg13.0
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> <float -1.300000e+01, float -1.300000e+01>)
+; CHECK-NEXT:    [[POWR:%.*]] = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> [[X]], <2 x float> splat (float -1.300000e+01))
 ; CHECK-NEXT:    ret <2 x float> [[POWR]]
 ;
   %powr = tail call afn <2 x float> @_Z4powrDv2_fS_(<2 x float> %x, <2 x float> <float -13.0, float -13.0>)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
index 7932f8d1fc5beb..a2d5ce2d658b57 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-rootn.ll
@@ -362,7 +362,7 @@ define <2 x half> @test_rootn_v2f16_2(<2 x half> %x) {
 define <2 x half> @test_rootn_v2f16_neg1(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg1(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, [[X]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <2 x half> splat (half 0xH3C00), [[X]]
 ; CHECK-NEXT:    ret <2 x half> [[__ROOTN2DIV]]
 ;
   %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> <i32 -1, i32 -1>)
@@ -373,7 +373,7 @@ define <2 x half> @test_rootn_v2f16_neg2(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg2(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call contract <2 x half> @llvm.sqrt.v2f16(<2 x half> [[X]])
-; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = fdiv contract <2 x half> <half 0xH3C00, half 0xH3C00>, [[TMP1]], !fpmath [[META0]]
+; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = fdiv contract <2 x half> splat (half 0xH3C00), [[TMP1]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x half> [[__ROOTN2RSQRT]]
 ;
   %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> <i32 -2, i32 -2>)
@@ -383,7 +383,7 @@ define <2 x half> @test_rootn_v2f16_neg2(<2 x half> %x) {
 define <2 x half> @test_rootn_v2f16_neg3(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_rootn_v2f16_neg3(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> <i32 -3, i32 -3>)
+; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> [[X]], <2 x i32> splat (i32 -3))
 ; CHECK-NEXT:    ret <2 x half> [[CALL]]
 ;
   %call = tail call <2 x half> @_Z5rootnDv2_DhDv2_i(<2 x half> %x, <2 x i32> <i32 -3, i32 -3>)
@@ -536,7 +536,7 @@ define <2 x float> @test_rootn_v2f32__y_1__strictfp(<2 x float> %x) #1 {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_1__strictfp(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 1, i32 1>) #[[ATTR0]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 1)) #[[ATTR0]]
 ; CHECK-NEXT:    ret <2 x float> [[CALL]]
 ;
 entry:
@@ -795,7 +795,7 @@ define <2 x float> @test_rootn_v2f32__y_neg1(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg1(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <2 x float> [[__ROOTN2DIV]]
 ;
 entry:
@@ -807,7 +807,7 @@ define <3 x float> @test_rootn_v3f32__y_neg1(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_neg1(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <3 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <3 x float> [[__ROOTN2DIV]]
 ;
 entry:
@@ -819,7 +819,7 @@ define <3 x float> @test_rootn_v3f32__y_neg1_undef(<3 x float> %x) {
 ; CHECK-LABEL: define <3 x float> @test_rootn_v3f32__y_neg1_undef(
 ; CHECK-SAME: <3 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <3 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <3 x float> [[__ROOTN2DIV]]
 ;
 entry:
@@ -831,7 +831,7 @@ define <4 x float> @test_rootn_v4f32__y_neg1(<4 x float> %x) {
 ; CHECK-LABEL: define <4 x float> @test_rootn_v4f32__y_neg1(
 ; CHECK-SAME: <4 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <4 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <4 x float> [[__ROOTN2DIV]]
 ;
 entry:
@@ -843,7 +843,7 @@ define <8 x float> @test_rootn_v8f32__y_neg1(<8 x float> %x) {
 ; CHECK-LABEL: define <8 x float> @test_rootn_v8f32__y_neg1(
 ; CHECK-SAME: <8 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <8 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <8 x float> [[__ROOTN2DIV]]
 ;
 entry:
@@ -855,7 +855,7 @@ define <16 x float> @test_rootn_v16f32__y_neg1(<16 x float> %x) {
 ; CHECK-LABEL: define <16 x float> @test_rootn_v16f32__y_neg1(
 ; CHECK-SAME: <16 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <16 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[__ROOTN2DIV:%.*]] = fdiv <16 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <16 x float> [[__ROOTN2DIV]]
 ;
 entry:
@@ -930,7 +930,7 @@ define <2 x float> @test_rootn_v2f32__y_neg2(<2 x float> %x) {
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
-; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = fdiv contract <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[TMP0]], !fpmath [[META0]]
+; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[TMP0]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[__ROOTN2RSQRT]]
 ;
 entry:
@@ -943,7 +943,7 @@ define <2 x float> @test_rootn_v2f32__y_neg2__flags(<2 x float> %x) {
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call nnan nsz contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]])
-; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = fdiv nnan nsz contract <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[TMP0]], !fpmath [[META0]]
+; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = fdiv nnan nsz contract <2 x float> splat (float 1.000000e+00), [[TMP0]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[__ROOTN2RSQRT]]
 ;
 entry:
@@ -955,7 +955,7 @@ define <2 x float> @test_rootn_v2f32__y_neg2__strictfp(<2 x float> %x) #1 {
 ; CHECK-LABEL: define <2 x float> @test_rootn_v2f32__y_neg2__strictfp(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -2, i32 -2>) #[[ATTR0]]
+; CHECK-NEXT:    [[__ROOTN2RSQRT:%.*]] = tail call <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -2)) #[[ATTR0]]
 ; CHECK-NEXT:    ret <2 x float> [[__ROOTN2RSQRT]]
 ;
 entry:
@@ -1277,7 +1277,7 @@ define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_4(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_4(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 4, i32 4>)
+; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 4))
 ; CHECK-NEXT:    ret <2 x float> [[CALL]]
 ;
 entry:
@@ -1289,7 +1289,7 @@ define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_neg3(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_neg3(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -3, i32 -3>)
+; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -3))
 ; CHECK-NEXT:    ret <2 x float> [[CALL]]
 ;
 entry:
@@ -1301,7 +1301,7 @@ define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_neg4(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_neg4(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 -4, i32 -4>)
+; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 -4))
 ; CHECK-NEXT:    ret <2 x float> [[CALL]]
 ;
 entry:
@@ -1313,7 +1313,7 @@ define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_5(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_rootn_afn_nnan_ninf_v2f32__y_5(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> <i32 5, i32 5>)
+; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan ninf afn <2 x float> @_Z5rootnDv2_fDv2_i(<2 x float> [[X]], <2 x i32> splat (i32 5))
 ; CHECK-NEXT:    ret <2 x float> [[CALL]]
 ;
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
index cc98b5333c5bb8..f00e5348e62251 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
@@ -27,9 +27,9 @@ define <2 x ptr addrspace(7)> @gep_vectors(<2 x ptr addrspace(7)> %in, <2 x i32>
 ; CHECK-SAME: ({ <2 x ptr addrspace(8)>, <2 x i32> } [[IN:%.*]], <2 x i32> [[IDX:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[IN_RSRC:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[IN]], 0
 ; CHECK-NEXT:    [[IN_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[IN]], 1
-; CHECK-NEXT:    [[RET_IDX:%.*]] = mul nsw <2 x i32> [[IDX]], <i32 40, i32 40>
-; CHECK-NEXT:    [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], <i32 8, i32 8>
-; CHECK-NEXT:    [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], <i32 24, i32 24>
+; CHECK-NEXT:    [[RET_IDX:%.*]] = mul nsw <2 x i32> [[IDX]], splat (i32 40)
+; CHECK-NEXT:    [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], splat (i32 8)
+; CHECK-NEXT:    [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], splat (i32 24)
 ; CHECK-NEXT:    [[RET:%.*]] = add <2 x i32> [[IN_OFF]], [[RET_OFFS1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1
@@ -47,9 +47,9 @@ define <2 x ptr addrspace(7)> @gep_vector_scalar(<2 x ptr addrspace(7)> %in, i64
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[IDX]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[DOTSPLAT_C:%.*]] = trunc <2 x i64> [[DOTSPLAT]] to <2 x i32>
-; CHECK-NEXT:    [[RET_IDX:%.*]] = mul nsw <2 x i32> [[DOTSPLAT_C]], <i32 40, i32 40>
-; CHECK-NEXT:    [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], <i32 8, i32 8>
-; CHECK-NEXT:    [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], <i32 24, i32 24>
+; CHECK-NEXT:    [[RET_IDX:%.*]] = mul nsw <2 x i32> [[DOTSPLAT_C]], splat (i32 40)
+; CHECK-NEXT:    [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], splat (i32 8)
+; CHECK-NEXT:    [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], splat (i32 24)
 ; CHECK-NEXT:    [[RET:%.*]] = add <2 x i32> [[IN_OFF]], [[RET_OFFS1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
index bd9234d864b3d8..05c727201bbf1d 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
@@ -63,7 +63,7 @@ define amdgpu_vs void @promote_store_aggr() #0 {
 ; CHECK-NEXT:    [[FOO6_FCA_1_INSERT:%.*]] = insertvalue [2 x float] [[FOO6_FCA_0_INSERT]], float 2.000000e+00, 1
 ; CHECK-NEXT:    [[FOO7:%.*]] = getelementptr [[BLOCK2:%.*]], ptr addrspace(1) @block2, i32 0, i32 1
 ; CHECK-NEXT:    store [2 x float] [[FOO6_FCA_1_INSERT]], ptr addrspace(1) [[FOO7]], align 4
-; CHECK-NEXT:    store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, ptr addrspace(1) @pv, align 16
+; CHECK-NEXT:    store <4 x float> splat (float 1.000000e+00), ptr addrspace(1) @pv, align 16
 ; CHECK-NEXT:    ret void
 ;
   %i = alloca i32, addrspace(5)
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll
index f1e2737b370ef0..74651e10c7809f 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll
@@ -23,7 +23,7 @@ entry:
 define amdgpu_kernel void @memset_all_5(i64 %val) {
 ; CHECK-LABEL: @memset_all_5(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i64> <i64 361700864190383365, i64 361700864190383365, i64 361700864190383365, i64 361700864190383365>, i64 [[VAL:%.*]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i64> splat (i64 361700864190383365), i64 [[VAL:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[VAL]], i32 1
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll b/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll
index 1e60261df13046..29996d68040e73 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-live-reg-opt.ll
@@ -357,7 +357,7 @@ define void @broken_phi() {
 ; GFX906-NEXT:  bb:
 ; GFX906-NEXT:    br label [[BB1:%.*]]
 ; GFX906:       bb1:
-; GFX906-NEXT:    [[I:%.*]] = phi <4 x i8> [ <i8 1, i8 1, i8 1, i8 1>, [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ]
+; GFX906-NEXT:    [[I:%.*]] = phi <4 x i8> [ splat (i8 1), [[BB:%.*]] ], [ [[I8:%.*]], [[BB7:%.*]] ]
 ; GFX906-NEXT:    br i1 false, label [[BB3:%.*]], label [[BB2:%.*]]
 ; GFX906:       bb2:
 ; GFX906-NEXT:    br label [[BB3]]
@@ -432,7 +432,7 @@ define amdgpu_kernel void @deletedPHI(i32 %in0, i1 %cmp, <10 x i8> %invec0) {
 ; GFX906-NEXT:    br label [[BB_1:%.*]]
 ; GFX906:       bb.1:
 ; GFX906-NEXT:    [[PHI0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB_11:%.*]] ]
-; GFX906-NEXT:    [[PHI1:%.*]] = phi <10 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[ENTRY]] ], [ [[VEC1:%.*]], [[BB_11]] ]
+; GFX906-NEXT:    [[PHI1:%.*]] = phi <10 x i8> [ splat (i8 1), [[ENTRY]] ], [ [[VEC1:%.*]], [[BB_11]] ]
 ; GFX906-NEXT:    br i1 [[CMP]], label [[BB_3:%.*]], label [[BB_2:%.*]]
 ; GFX906:       bb.2:
 ; GFX906-NEXT:    br label [[BB_3]]
@@ -514,7 +514,7 @@ define amdgpu_kernel void @multiple_unwind(i1 %cmp, <10 x i8> %invec) {
 ; GFX906-NEXT:  entry:
 ; GFX906-NEXT:    br label [[BB_1:%.*]]
 ; GFX906:       bb.1:
-; GFX906-NEXT:    [[PHI0:%.*]] = phi <10 x i8> [ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[ENTRY:%.*]] ], [ [[PHI3:%.*]], [[BB_8:%.*]] ]
+; GFX906-NEXT:    [[PHI0:%.*]] = phi <10 x i8> [ splat (i8 1), [[ENTRY:%.*]] ], [ [[PHI3:%.*]], [[BB_8:%.*]] ]
 ; GFX906-NEXT:    br i1 [[CMP]], label [[BB_3:%.*]], label [[BB_2:%.*]]
 ; GFX906:       bb.2:
 ; GFX906-NEXT:    br label [[BB_3]]
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll
index 73279844e91b33..6eece708fa337d 100644
--- a/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll
+++ b/llvm/test/CodeGen/Hexagon/autohvx/vector-align-tbaa.ll
@@ -135,18 +135,18 @@ define void @f3(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
 ; CHECK-NEXT:    [[CST3:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32>
 ; CHECK-NEXT:    [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST3]], <32 x i32> undef, i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST4:%.*]] = bitcast <32 x i32> [[CUP]] to <128 x i8>
-; CHECK-NEXT:    [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> zeroinitializer, i32 [[PTI1]])
+; CHECK-NEXT:    [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> zeroinitializer, i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST6:%.*]] = bitcast <32 x i32> [[CUP5]] to <128 x i8>
 ; CHECK-NEXT:    [[CST7:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32>
 ; CHECK-NEXT:    [[CST8:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32>
 ; CHECK-NEXT:    [[CUP9:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST7]], <32 x i32> [[CST8]], i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST10:%.*]] = bitcast <32 x i32> [[CUP9]] to <128 x i8>
-; CHECK-NEXT:    [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[PTI1]])
+; CHECK-NEXT:    [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> splat (i32 -1), i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST12:%.*]] = bitcast <32 x i32> [[CUP11]] to <128 x i8>
 ; CHECK-NEXT:    [[CST13:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32>
 ; CHECK-NEXT:    [[CUP14:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[CST13]], i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST15:%.*]] = bitcast <32 x i32> [[CUP14]] to <128 x i8>
-; CHECK-NEXT:    [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[PTI1]])
+; CHECK-NEXT:    [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> splat (i32 -1), i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST17:%.*]] = bitcast <32 x i32> [[CUP16]] to <128 x i8>
 ; CHECK-NEXT:    [[TRN:%.*]] = trunc <128 x i8> [[CST6]] to <128 x i1>
 ; CHECK-NEXT:    call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST4]], ptr [[ITP]], i32 128, <128 x i1> [[TRN]]), !tbaa [[TBAA5:![0-9]+]]
@@ -188,18 +188,18 @@ define void @f4(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
 ; CHECK-NEXT:    [[CST3:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32>
 ; CHECK-NEXT:    [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST3]], <32 x i32> undef, i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST4:%.*]] = bitcast <32 x i32> [[CUP]] to <128 x i8>
-; CHECK-NEXT:    [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> zeroinitializer, i32 [[PTI1]])
+; CHECK-NEXT:    [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> zeroinitializer, i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST6:%.*]] = bitcast <32 x i32> [[CUP5]] to <128 x i8>
 ; CHECK-NEXT:    [[CST7:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32>
 ; CHECK-NEXT:    [[CST8:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32>
 ; CHECK-NEXT:    [[CUP9:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST7]], <32 x i32> [[CST8]], i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST10:%.*]] = bitcast <32 x i32> [[CUP9]] to <128 x i8>
-; CHECK-NEXT:    [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[PTI1]])
+; CHECK-NEXT:    [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> splat (i32 -1), i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST12:%.*]] = bitcast <32 x i32> [[CUP11]] to <128 x i8>
 ; CHECK-NEXT:    [[CST13:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32>
 ; CHECK-NEXT:    [[CUP14:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[CST13]], i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST15:%.*]] = bitcast <32 x i32> [[CUP14]] to <128 x i8>
-; CHECK-NEXT:    [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[PTI1]])
+; CHECK-NEXT:    [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> splat (i32 -1), i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST17:%.*]] = bitcast <32 x i32> [[CUP16]] to <128 x i8>
 ; CHECK-NEXT:    [[TRN:%.*]] = trunc <128 x i8> [[CST6]] to <128 x i1>
 ; CHECK-NEXT:    call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST4]], ptr [[ITP]], i32 128, <128 x i1> [[TRN]])
@@ -241,18 +241,18 @@ define void @f5(ptr %a0, i32 %a1, <64 x i16> %a2, <64 x i16> %a3) #0 {
 ; CHECK-NEXT:    [[CST3:%.*]] = bitcast <64 x i16> [[A2:%.*]] to <32 x i32>
 ; CHECK-NEXT:    [[CUP:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST3]], <32 x i32> undef, i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST4:%.*]] = bitcast <32 x i32> [[CUP]] to <128 x i8>
-; CHECK-NEXT:    [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> zeroinitializer, i32 [[PTI1]])
+; CHECK-NEXT:    [[CUP5:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> zeroinitializer, i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST6:%.*]] = bitcast <32 x i32> [[CUP5]] to <128 x i8>
 ; CHECK-NEXT:    [[CST7:%.*]] = bitcast <64 x i16> [[A3:%.*]] to <32 x i32>
 ; CHECK-NEXT:    [[CST8:%.*]] = bitcast <64 x i16> [[A2]] to <32 x i32>
 ; CHECK-NEXT:    [[CUP9:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> [[CST7]], <32 x i32> [[CST8]], i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST10:%.*]] = bitcast <32 x i32> [[CUP9]] to <128 x i8>
-; CHECK-NEXT:    [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[PTI1]])
+; CHECK-NEXT:    [[CUP11:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> splat (i32 -1), <32 x i32> splat (i32 -1), i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST12:%.*]] = bitcast <32 x i32> [[CUP11]] to <128 x i8>
 ; CHECK-NEXT:    [[CST13:%.*]] = bitcast <64 x i16> [[A3]] to <32 x i32>
 ; CHECK-NEXT:    [[CUP14:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> undef, <32 x i32> [[CST13]], i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST15:%.*]] = bitcast <32 x i32> [[CUP14]] to <128 x i8>
-; CHECK-NEXT:    [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, i32 [[PTI1]])
+; CHECK-NEXT:    [[CUP16:%.*]] = call <32 x i32> @llvm.hexagon.V6.vlalignb.128B(<32 x i32> zeroinitializer, <32 x i32> splat (i32 -1), i32 [[PTI1]])
 ; CHECK-NEXT:    [[CST17:%.*]] = bitcast <32 x i32> [[CUP16]] to <128 x i8>
 ; CHECK-NEXT:    [[TRN:%.*]] = trunc <128 x i8> [[CST6]] to <128 x i1>
 ; CHECK-NEXT:    call void @llvm.masked.store.v128i8.p0(<128 x i8> [[CST4]], ptr [[ITP]], i32 128, <128 x i1> [[TRN]]), !tbaa [[TBAA5]]
diff --git a/llvm/test/CodeGen/NVPTX/variadics-lowering.ll b/llvm/test/CodeGen/NVPTX/variadics-lowering.ll
index 5c8eb265843645..5502980a263b2e 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-lowering.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-lowering.ll
@@ -276,7 +276,7 @@ define dso_local i32 @baz() {
 ; CHECK-NEXT:    [[VARARG_BUFFER:%.*]] = alloca [[BAZ_VARARG:%.*]], align 16
 ; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr [[VARARG_BUFFER]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw [[BAZ_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP0]], align 16
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP0]], align 16
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 @variadics3(i32 noundef 1, ptr [[VARARG_BUFFER]])
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr [[VARARG_BUFFER]])
 ; CHECK-NEXT:    ret i32 [[CALL]]
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll
index 32aec9586c29cb..7cae78f40e0d8d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll
@@ -12,8 +12,8 @@ define void @gather_bad_or(ptr noalias nocapture %A, ptr noalias nocapture reado
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
-; CHECK-NEXT:    [[OR:%.*]] = or <32 x i64> [[I]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], splat (i64 5)
+; CHECK-NEXT:    [[OR:%.*]] = or <32 x i64> [[I]], splat (i64 1)
 ; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i64> [[OR]]
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
 ; CHECK-NEXT:    [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
@@ -21,7 +21,7 @@ define void @gather_bad_or(ptr noalias nocapture %A, ptr noalias nocapture reado
 ; CHECK-NEXT:    [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
 ; CHECK-NEXT:    store <32 x i8> [[I4]], ptr [[I2]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32)
 ; CHECK-NEXT:    [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       for.cond.cleanup:
@@ -58,7 +58,7 @@ define void @gather_narrow_index(ptr noalias nocapture %A, ptr noalias nocapture
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <32 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = mul nuw nsw <32 x i32> [[VEC_IND]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[TMP0:%.*]] = mul nuw nsw <32 x i32> [[VEC_IND]], splat (i32 5)
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i32> [[TMP0]]
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[TMP1]], i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
@@ -66,7 +66,7 @@ define void @gather_narrow_index(ptr noalias nocapture %A, ptr noalias nocapture
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
 ; CHECK-NEXT:    store <32 x i8> [[TMP4]], ptr [[TMP2]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], splat (i32 32)
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       for.cond.cleanup:
@@ -102,8 +102,8 @@ define void @gather_broken_stride(ptr noalias nocapture %A, ptr noalias nocaptur
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 32>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
-; CHECK-NEXT:    [[OR:%.*]] = or <32 x i64> [[I]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], splat (i64 5)
+; CHECK-NEXT:    [[OR:%.*]] = or <32 x i64> [[I]], splat (i64 1)
 ; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i64> [[OR]]
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
 ; CHECK-NEXT:    [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
@@ -111,7 +111,7 @@ define void @gather_broken_stride(ptr noalias nocapture %A, ptr noalias nocaptur
 ; CHECK-NEXT:    [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
 ; CHECK-NEXT:    store <32 x i8> [[I4]], ptr [[I2]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32)
 ; CHECK-NEXT:    [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       for.cond.cleanup:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll
index d723c2f6df1afd..ebd7918851f73f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store.ll
@@ -382,7 +382,7 @@ define void @negative_shl_non_commute(ptr noalias nocapture %A, ptr noalias noca
 ; CHECK-NEXT:    [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
 ; CHECK-NEXT:    store <8 x i32> [[I4]], ptr [[I2]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
 ; CHECK-NEXT:    [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       for.cond.cleanup:
@@ -677,9 +677,9 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
 ; ZVE32F:       bb2:
 ; ZVE32F-NEXT:    [[I:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[I15:%.*]], [[BB2]] ]
 ; ZVE32F-NEXT:    [[I3:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[BB]] ], [ [[I16:%.*]], [[BB2]] ]
-; ZVE32F-NEXT:    [[I4:%.*]] = mul nuw nsw <2 x i64> [[I3]], <i64 5, i64 5>
-; ZVE32F-NEXT:    [[I5:%.*]] = mul <2 x i64> [[I3]], <i64 5, i64 5>
-; ZVE32F-NEXT:    [[I6:%.*]] = add <2 x i64> [[I5]], <i64 10, i64 10>
+; ZVE32F-NEXT:    [[I4:%.*]] = mul nuw nsw <2 x i64> [[I3]], splat (i64 5)
+; ZVE32F-NEXT:    [[I5:%.*]] = mul <2 x i64> [[I3]], splat (i64 5)
+; ZVE32F-NEXT:    [[I6:%.*]] = add <2 x i64> [[I5]], splat (i64 10)
 ; ZVE32F-NEXT:    [[I7:%.*]] = getelementptr inbounds ptr, ptr [[ARG1:%.*]], <2 x i64> [[I4]]
 ; ZVE32F-NEXT:    [[I8:%.*]] = getelementptr inbounds ptr, ptr [[ARG1]], <2 x i64> [[I6]]
 ; ZVE32F-NEXT:    [[I9:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I7]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x ptr> undef)
@@ -689,7 +689,7 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur
 ; ZVE32F-NEXT:    [[I13:%.*]] = getelementptr inbounds ptr, ptr [[I11]], i64 2
 ; ZVE32F-NEXT:    store <2 x ptr> [[I10]], ptr [[I13]], align 8
 ; ZVE32F-NEXT:    [[I15]] = add nuw i64 [[I]], 4
-; ZVE32F-NEXT:    [[I16]] = add <2 x i64> [[I3]], <i64 4, i64 4>
+; ZVE32F-NEXT:    [[I16]] = add <2 x i64> [[I3]], splat (i64 4)
 ; ZVE32F-NEXT:    [[I17:%.*]] = icmp eq i64 [[I15]], 1024
 ; ZVE32F-NEXT:    br i1 [[I17]], label [[BB18:%.*]], label [[BB2]]
 ; ZVE32F:       bb18:
@@ -758,15 +758,15 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu
 ; ZVE32F-NEXT:    [[I6:%.*]] = load <2 x ptr>, ptr [[I4]], align 8
 ; ZVE32F-NEXT:    [[I7:%.*]] = getelementptr inbounds ptr, ptr [[I4]], i64 2
 ; ZVE32F-NEXT:    [[I9:%.*]] = load <2 x ptr>, ptr [[I7]], align 8
-; ZVE32F-NEXT:    [[I10:%.*]] = mul nuw nsw <2 x i64> [[I3]], <i64 5, i64 5>
-; ZVE32F-NEXT:    [[I11:%.*]] = mul <2 x i64> [[I3]], <i64 5, i64 5>
-; ZVE32F-NEXT:    [[I12:%.*]] = add <2 x i64> [[I11]], <i64 10, i64 10>
+; ZVE32F-NEXT:    [[I10:%.*]] = mul nuw nsw <2 x i64> [[I3]], splat (i64 5)
+; ZVE32F-NEXT:    [[I11:%.*]] = mul <2 x i64> [[I3]], splat (i64 5)
+; ZVE32F-NEXT:    [[I12:%.*]] = add <2 x i64> [[I11]], splat (i64 10)
 ; ZVE32F-NEXT:    [[I13:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], <2 x i64> [[I10]]
 ; ZVE32F-NEXT:    [[I14:%.*]] = getelementptr inbounds ptr, ptr [[ARG]], <2 x i64> [[I12]]
 ; ZVE32F-NEXT:    call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I6]], <2 x ptr> [[I13]], i32 8, <2 x i1> <i1 true, i1 true>)
 ; ZVE32F-NEXT:    call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I9]], <2 x ptr> [[I14]], i32 8, <2 x i1> <i1 true, i1 true>)
 ; ZVE32F-NEXT:    [[I15]] = add nuw i64 [[I]], 4
-; ZVE32F-NEXT:    [[I16]] = add <2 x i64> [[I3]], <i64 4, i64 4>
+; ZVE32F-NEXT:    [[I16]] = add <2 x i64> [[I3]], splat (i64 4)
 ; ZVE32F-NEXT:    [[I17:%.*]] = icmp eq i64 [[I15]], 1024
 ; ZVE32F-NEXT:    br i1 [[I17]], label [[BB18:%.*]], label [[BB2]]
 ; ZVE32F:       bb18:
@@ -995,7 +995,7 @@ define void @gather_narrow_idx(ptr noalias nocapture %A, ptr noalias nocapture r
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <32 x i16> [ <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[I:%.*]] = mul nuw nsw <32 x i16> [[VEC_IND]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+; CHECK-NEXT:    [[I:%.*]] = mul nuw nsw <32 x i16> [[VEC_IND]], splat (i16 5)
 ; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i16> [[I]]
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
 ; CHECK-NEXT:    [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
@@ -1003,7 +1003,7 @@ define void @gather_narrow_idx(ptr noalias nocapture %A, ptr noalias nocapture r
 ; CHECK-NEXT:    [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
 ; CHECK-NEXT:    store <32 x i8> [[I4]], ptr [[I2]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i16> [[VEC_IND]], <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i16> [[VEC_IND]], splat (i16 32)
 ; CHECK-NEXT:    [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       for.cond.cleanup:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll
index 6342e288647c2f..e70868d4ffeb03 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll
@@ -119,7 +119,7 @@ define dso_local void @foo3(ptr noalias nocapture %A, ptr noalias nocapture read
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> [[INDUCTION]], <i32 32002, i32 32002, i32 32002, i32 32002>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> [[INDUCTION]], splat (i32 32002)
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]]
@@ -226,13 +226,33 @@ for.cond.cleanup:
   ret void
 }
 
+;
+define dso_local void @inconsistent_tripcounts(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
 ; CHECK-LABEL: @inconsistent_tripcounts(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001)
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NOT:   @llvm.arm.mve.vctp32
-; CHECK:       @llvm.get.active.lane.mask
-; CHECK:       ret void
+; CHECK-NEXT:    [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 -1)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4
+; CHECK-NEXT:    [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4
+; CHECK-NEXT:    [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4
+; CHECK-NEXT:    [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
 ;
-define dso_local void @inconsistent_tripcounts(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
 entry:
   %start = call i32 @llvm.start.loop.iterations.i32(i32 8001)
   br label %vector.body
@@ -261,13 +281,33 @@ for.cond.cleanup:
   ret void
 }
 
+;
+define dso_local void @overflow_in_sub(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
 ; CHECK-LABEL: @overflow_in_sub(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 1073741824)
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NOT:   @llvm.arm.mve.vctp32
-; CHECK:       @llvm.get.active.lane.mask
-; CHECK:       ret void
+; CHECK-NEXT:    [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 32003)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4
+; CHECK-NEXT:    [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4
+; CHECK-NEXT:    [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4
+; CHECK-NEXT:    [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
 ;
-define dso_local void @overflow_in_sub(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
 entry:
   %start = call i32 @llvm.start.loop.iterations.i32(i32 1073741824)
   br label %vector.body
@@ -296,13 +336,33 @@ for.cond.cleanup:
 }
 
 
+;
+define dso_local void @IV_not_an_induction(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
 ; CHECK-LABEL: @IV_not_an_induction(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001)
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NOT:   @llvm.arm.mve.vctp32
-; CHECK:       @llvm.get.active.lane.mask
-; CHECK:       ret void
+; CHECK-NEXT:    [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[N:%.*]], i32 32003)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4
+; CHECK-NEXT:    [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4
+; CHECK-NEXT:    [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4
+; CHECK-NEXT:    [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
 ;
-define dso_local void @IV_not_an_induction(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
 entry:
   %start = call i32 @llvm.start.loop.iterations.i32(i32 8001)
   br label %vector.body
@@ -331,13 +391,33 @@ for.cond.cleanup:
   ret void
 }
 
+;
+define dso_local void @IV_wrong_step(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
 ; CHECK-LABEL: @IV_wrong_step(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001)
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NOT:   @llvm.arm.mve.vctp32
-; CHECK:       @llvm.get.active.lane.mask
-; CHECK:       ret void
+; CHECK-NEXT:    [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 32003)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 3
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4
+; CHECK-NEXT:    [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4
+; CHECK-NEXT:    [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4
+; CHECK-NEXT:    [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
 ;
-define dso_local void @IV_wrong_step(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
 entry:
   %start = call i32 @llvm.start.loop.iterations.i32(i32 8001)
   br label %vector.body
@@ -369,13 +449,33 @@ for.cond.cleanup:
   ret void
 }
 
+;
+define dso_local void @IV_step_not_constant(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
 ; CHECK-LABEL: @IV_step_not_constant(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 8001)
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NOT:   @llvm.arm.mve.vctp32
-; CHECK:       @llvm.get.active.lane.mask
-; CHECK:       ret void
+; CHECK-NEXT:    [[LSR_IV14:%.*]] = phi ptr [ [[SCEVGEP15:%.*]], [[VECTOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV11:%.*]] = phi ptr [ [[SCEVGEP12:%.*]], [[VECTOR_BODY]] ], [ [[C:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[VECTOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[START]], [[ENTRY]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 32003)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD9:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV11]], i32 4, <4 x i1> [[TMP1]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[LSR_IV14]], i32 4, <4 x i1> [[TMP1]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], [[N:%.*]]
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 4
+; CHECK-NEXT:    [[SCEVGEP12]] = getelementptr i32, ptr [[LSR_IV11]], i32 4
+; CHECK-NEXT:    [[SCEVGEP15]] = getelementptr i32, ptr [[LSR_IV14]], i32 4
+; CHECK-NEXT:    [[TMP3]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    br i1 [[TMP4]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
 ;
-define dso_local void @IV_step_not_constant(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, ptr noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
 entry:
   %start = call i32 @llvm.start.loop.iterations.i32(i32 8001)
   br label %vector.body
@@ -406,13 +506,49 @@ for.cond.cleanup:
   ret void
 }
 
+;
+define dso_local void @outerloop_phi(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
 ; CHECK-LABEL: @outerloop_phi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP24:%.*]] = icmp eq i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP24]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_PH_PREHEADER:%.*]]
+; CHECK:       vector.ph.preheader:
+; CHECK-NEXT:    br label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[LSR_IV36:%.*]] = phi ptr [ [[B:%.*]], [[VECTOR_PH_PREHEADER]] ], [ [[SCEVGEP37:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
+; CHECK-NEXT:    [[LSR_IV31:%.*]] = phi ptr [ [[C:%.*]], [[VECTOR_PH_PREHEADER]] ], [ [[SCEVGEP32:%.*]], [[FOR_COND_CLEANUP3]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi ptr [ [[A:%.*]], [[VECTOR_PH_PREHEADER]] ], [ [[SCEVGEP:%.*]], [[FOR_COND_CLEANUP3]] ]
+; CHECK-NEXT:    [[J_025:%.*]] = phi i32 [ [[INC11:%.*]], [[FOR_COND_CLEANUP3]] ], [ 0, [[VECTOR_PH_PREHEADER]] ]
+; CHECK-NEXT:    [[START:%.*]] = call i32 @llvm.start.loop.iterations.i32(i32 1025)
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NOT:   @llvm.arm.mve.vctp32
-; CHECK:       @llvm.get.active.lane.mask
-; CHECK:       ret void
+; CHECK-NEXT:    [[LSR_IV38:%.*]] = phi ptr [ [[SCEVGEP39:%.*]], [[VECTOR_BODY]] ], [ [[LSR_IV36]], [[VECTOR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV33:%.*]] = phi ptr [ [[SCEVGEP34:%.*]], [[VECTOR_BODY]] ], [ [[LSR_IV31]], [[VECTOR_PH]] ]
+; CHECK-NEXT:    [[LSR_IV28:%.*]] = phi ptr [ [[SCEVGEP29:%.*]], [[VECTOR_BODY]] ], [ [[LSR_IV]], [[VECTOR_PH]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ [[START]], [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[J_025]], i32 4096)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV38]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD27:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[LSR_IV33]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> undef)
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_LOAD27]], [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP1]], ptr [[LSR_IV28]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[SCEVGEP29]] = getelementptr i32, ptr [[LSR_IV28]], i32 4
+; CHECK-NEXT:    [[SCEVGEP34]] = getelementptr i32, ptr [[LSR_IV33]], i32 4
+; CHECK-NEXT:    [[SCEVGEP39]] = getelementptr i32, ptr [[LSR_IV38]], i32 4
+; CHECK-NEXT:    [[TMP2]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[TMP0]], i32 1)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[TMP3]], label [[VECTOR_BODY]], label [[FOR_COND_CLEANUP3]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.cond.cleanup3:
+; CHECK-NEXT:    [[INC11]] = add nuw i32 [[J_025]], 1
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i32, ptr [[LSR_IV]], i32 1
+; CHECK-NEXT:    [[SCEVGEP32]] = getelementptr i32, ptr [[LSR_IV31]], i32 1
+; CHECK-NEXT:    [[SCEVGEP37]] = getelementptr i32, ptr [[LSR_IV36]], i32 1
+; CHECK-NEXT:    [[EXITCOND26:%.*]] = icmp eq i32 [[INC11]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND26]], label [[FOR_COND_CLEANUP]], label [[VECTOR_PH]]
 ;
-define dso_local void @outerloop_phi(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
 entry:
   %cmp24 = icmp eq i32 %N, 0
   br i1 %cmp24, label %for.cond.cleanup, label %vector.ph.preheader
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
index b91800c48cec4f..239372e1addb23 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-optimisation-deep.ll
@@ -6,13 +6,13 @@ target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 define arm_aapcs_vfpcc void @push_out_add_sub_block(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) {
 ; CHECK-LABEL: @push_out_add_sub_block(
 ; CHECK-NEXT:  vector.ph:
-; CHECK-NEXT:    [[PUSHEDOUTADD:%.*]] = add <4 x i32> <i32 0, i32 2, i32 4, i32 6>, <i32 6, i32 6, i32 6, i32 6>
-; CHECK-NEXT:    [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[PUSHEDOUTADD:%.*]] = add <4 x i32> <i32 0, i32 2, i32 4, i32 6>, splat (i32 6)
+; CHECK-NEXT:    [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]]
-; CHECK-NEXT:    [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], <i32 32, i32 32, i32 32, i32 32>
+; CHECK-NEXT:    [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], splat (i32 32)
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ]
@@ -65,13 +65,13 @@ end:
 define arm_aapcs_vfpcc void @push_out_add_sub_block_commutedphi(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) {
 ; CHECK-LABEL: @push_out_add_sub_block_commutedphi(
 ; CHECK-NEXT:  vector.ph:
-; CHECK-NEXT:    [[PUSHEDOUTADD:%.*]] = add <4 x i32> <i32 0, i32 2, i32 4, i32 6>, <i32 6, i32 6, i32 6, i32 6>
-; CHECK-NEXT:    [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[PUSHEDOUTADD:%.*]] = add <4 x i32> <i32 0, i32 2, i32 4, i32 6>, splat (i32 6)
+; CHECK-NEXT:    [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]]
-; CHECK-NEXT:    [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], <i32 32, i32 32, i32 32, i32 32>
+; CHECK-NEXT:    [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], splat (i32 32)
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ]
@@ -124,15 +124,15 @@ end:
 define arm_aapcs_vfpcc void @push_out_mul_sub_block(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n.vec) {
 ; CHECK-LABEL: @push_out_mul_sub_block(
 ; CHECK-NEXT:  vector.ph:
-; CHECK-NEXT:    [[PUSHEDOUTMUL:%.*]] = mul <4 x i32> <i32 0, i32 2, i32 4, i32 6>, <i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[PRODUCT:%.*]] = mul <4 x i32> <i32 8, i32 8, i32 8, i32 8>, <i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[PUSHEDOUTADD:%.*]] = add <4 x i32> [[PUSHEDOUTMUL]], <i32 6, i32 6, i32 6, i32 6>
-; CHECK-NEXT:    [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[PUSHEDOUTMUL:%.*]] = mul <4 x i32> <i32 0, i32 2, i32 4, i32 6>, splat (i32 3)
+; CHECK-NEXT:    [[PRODUCT:%.*]] = mul <4 x i32> splat (i32 8), splat (i32 3)
+; CHECK-NEXT:    [[PUSHEDOUTADD:%.*]] = add <4 x i32> [[PUSHEDOUTMUL]], splat (i32 6)
+; CHECK-NEXT:    [[SCALEDINDEX:%.*]] = shl <4 x i32> [[PUSHEDOUTADD]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[STARTINDEX:%.*]] = add <4 x i32> [[SCALEDINDEX]], [[DOTSPLAT]]
-; CHECK-NEXT:    [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], <i32 96, i32 96, i32 96, i32 96>
+; CHECK-NEXT:    [[PREINCREMENTSTARTINDEX:%.*]] = sub <4 x i32> [[STARTINDEX]], splat (i32 96)
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY_END:%.*]] ]
@@ -195,8 +195,8 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_loop(ptr noalias nocapture readonl
 ; CHECK:       vector.2.ph:
 ; CHECK-NEXT:    br label [[VECTOR_2_BODY:%.*]]
 ; CHECK:       vector.2.body:
-; CHECK-NEXT:    [[TMP0:%.*]] = mul <4 x i32> [[VEC_IND]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[SCALEDINDEX:%.*]] = shl <4 x i32> [[TMP0]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP0:%.*]] = mul <4 x i32> [[VEC_IND]], splat (i32 3)
+; CHECK-NEXT:    [[SCALEDINDEX:%.*]] = shl <4 x i32> [[TMP0]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[DATA:%.*]] to i32
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
@@ -211,7 +211,7 @@ define arm_aapcs_vfpcc void @push_out_mul_sub_loop(ptr noalias nocapture readonl
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[VECTOR_BODY_END]], label [[VECTOR_2_BODY]]
 ; CHECK:       vector.body.end:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[END:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       end:
@@ -261,13 +261,13 @@ define arm_aapcs_vfpcc void @invariant_add(ptr noalias nocapture readonly %data,
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[L0:%.*]] = mul <4 x i32> [[VEC_IND]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[L0:%.*]] = mul <4 x i32> [[VEC_IND]], splat (i32 3)
 ; CHECK-NEXT:    [[L1:%.*]] = add <4 x i32> [[L0]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr [[DATA:%.*]], <4 x i32> [[L1]], i32 32, i32 2, i32 1)
 ; CHECK-NEXT:    [[L3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[INDEX]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP0]], ptr [[L3]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
 ; CHECK-NEXT:    [[L5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC:%.*]]
 ; CHECK-NEXT:    br i1 [[L5]], label [[END:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       end:
diff --git a/llvm/test/CodeGen/X86/codegen-prepare-extload.ll b/llvm/test/CodeGen/X86/codegen-prepare-extload.ll
index c18ae82aeb71fa..7846f82ad54cd0 100644
--- a/llvm/test/CodeGen/X86/codegen-prepare-extload.ll
+++ b/llvm/test/CodeGen/X86/codegen-prepare-extload.ll
@@ -523,7 +523,7 @@ declare void @dummy(i64, i64, i64)
 define void @vectorPromotion() {
 ; OPTALL-LABEL: define void @vectorPromotion() {
 ; OPTALL-NEXT:  entry:
-; OPTALL-NEXT:    [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8>
+; OPTALL-NEXT:    [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, splat (i32 8)
 ; OPTALL-NEXT:    [[B:%.*]] = zext <2 x i32> [[A]] to <2 x i64>
 ; OPTALL-NEXT:    ret void
 ;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll
index 4cc038f03ff2c3..8a1e3551b2741f 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool opt --version 4
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
 ;
 ; RUN: opt < %s -passes=msan -S | FileCheck %s
 ;
@@ -14,7 +14,7 @@ define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
 ; CHECK-NEXT:    unreachable
@@ -25,7 +25,7 @@ define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -58,7 +58,7 @@ define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -69,7 +69,7 @@ define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -102,7 +102,7 @@ define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -113,7 +113,7 @@ define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -146,7 +146,7 @@ define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -157,7 +157,7 @@ define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -189,7 +189,7 @@ define <1 x i64> @sqshl1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -199,9 +199,9 @@ define <1 x i64> @sqshl1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
 ;
@@ -217,7 +217,7 @@ define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -228,7 +228,7 @@ define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -258,7 +258,7 @@ define i64 @sqshl_scalar_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -286,7 +286,7 @@ define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -297,7 +297,7 @@ define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -330,7 +330,7 @@ define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -341,7 +341,7 @@ define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -374,7 +374,7 @@ define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -385,7 +385,7 @@ define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -418,7 +418,7 @@ define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -429,7 +429,7 @@ define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -463,7 +463,7 @@ define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -474,7 +474,7 @@ define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -508,7 +508,7 @@ define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -519,7 +519,7 @@ define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -553,7 +553,7 @@ define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -564,7 +564,7 @@ define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -598,7 +598,7 @@ define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -609,7 +609,7 @@ define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -643,7 +643,7 @@ define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -654,7 +654,7 @@ define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -688,7 +688,7 @@ define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -699,7 +699,7 @@ define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -733,7 +733,7 @@ define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -744,7 +744,7 @@ define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -778,7 +778,7 @@ define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -789,7 +789,7 @@ define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -821,7 +821,7 @@ define <1 x i64> @uqshl1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -831,9 +831,9 @@ define <1 x i64> @uqshl1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
 ;
@@ -849,7 +849,7 @@ define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -860,7 +860,7 @@ define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -890,7 +890,7 @@ define i64 @uqshl_scalar_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -941,7 +941,7 @@ define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -952,7 +952,7 @@ define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -985,7 +985,7 @@ define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -996,7 +996,7 @@ define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1029,7 +1029,7 @@ define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1040,7 +1040,7 @@ define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1073,7 +1073,7 @@ define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1084,7 +1084,7 @@ define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1116,7 +1116,7 @@ define <1 x i64> @srshl1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1126,9 +1126,9 @@ define <1 x i64> @srshl1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
 ;
@@ -1144,7 +1144,7 @@ define i64 @srshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1155,7 +1155,7 @@ define i64 @srshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1185,7 +1185,7 @@ define i64 @srshl_scalar_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1213,7 +1213,7 @@ define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1224,7 +1224,7 @@ define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1257,7 +1257,7 @@ define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1268,7 +1268,7 @@ define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1301,7 +1301,7 @@ define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1312,7 +1312,7 @@ define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1345,7 +1345,7 @@ define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1356,7 +1356,7 @@ define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1388,7 +1388,7 @@ define <1 x i64> @urshl1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1398,9 +1398,9 @@ define <1 x i64> @urshl1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
 ;
@@ -1416,7 +1416,7 @@ define i64 @urshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1427,7 +1427,7 @@ define i64 @urshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1457,7 +1457,7 @@ define i64 @urshl_scalar_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1485,7 +1485,7 @@ define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1496,7 +1496,7 @@ define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1530,7 +1530,7 @@ define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1541,7 +1541,7 @@ define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1575,7 +1575,7 @@ define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1586,7 +1586,7 @@ define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1620,7 +1620,7 @@ define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1631,7 +1631,7 @@ define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1665,7 +1665,7 @@ define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1676,7 +1676,7 @@ define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1710,7 +1710,7 @@ define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1721,7 +1721,7 @@ define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1755,7 +1755,7 @@ define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1766,7 +1766,7 @@ define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1800,7 +1800,7 @@ define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1811,7 +1811,7 @@ define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1867,7 +1867,7 @@ define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1878,7 +1878,7 @@ define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1911,7 +1911,7 @@ define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1922,7 +1922,7 @@ define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1955,7 +1955,7 @@ define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1966,7 +1966,7 @@ define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -1999,7 +1999,7 @@ define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2010,7 +2010,7 @@ define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2043,7 +2043,7 @@ define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2054,7 +2054,7 @@ define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2087,7 +2087,7 @@ define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2098,7 +2098,7 @@ define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2131,7 +2131,7 @@ define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2142,7 +2142,7 @@ define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2176,7 +2176,7 @@ define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2187,7 +2187,7 @@ define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2221,7 +2221,7 @@ define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2232,7 +2232,7 @@ define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2266,7 +2266,7 @@ define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2277,7 +2277,7 @@ define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2311,7 +2311,7 @@ define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2322,7 +2322,7 @@ define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2354,7 +2354,7 @@ define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2364,9 +2364,9 @@ define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
 ;
@@ -2382,7 +2382,7 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2393,7 +2393,7 @@ define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2423,7 +2423,7 @@ define i64 @sqrshl_scalar_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2451,7 +2451,7 @@ define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2462,7 +2462,7 @@ define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2496,7 +2496,7 @@ define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2507,7 +2507,7 @@ define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2541,7 +2541,7 @@ define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2552,7 +2552,7 @@ define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2586,7 +2586,7 @@ define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2597,7 +2597,7 @@ define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2631,7 +2631,7 @@ define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2642,7 +2642,7 @@ define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2674,7 +2674,7 @@ define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2684,9 +2684,9 @@ define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
 ;
@@ -2702,7 +2702,7 @@ define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2713,7 +2713,7 @@ define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2743,7 +2743,7 @@ define i64 @uqrshl_scalar_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2792,7 +2792,7 @@ define <8 x i8> @urshr8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2802,9 +2802,9 @@ define <8 x i8> @urshr8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1))
 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
 ;
@@ -2819,7 +2819,7 @@ define <4 x i16> @urshr4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2829,9 +2829,9 @@ define <4 x i16> @urshr4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1))
 ; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
 ;
@@ -2846,7 +2846,7 @@ define <2 x i32> @urshr2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2856,9 +2856,9 @@ define <2 x i32> @urshr2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1))
 ; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
@@ -2873,7 +2873,7 @@ define <16 x i8> @urshr16b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2883,9 +2883,9 @@ define <16 x i8> @urshr16b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1))
 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
 ;
@@ -2900,7 +2900,7 @@ define <8 x i16> @urshr8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2910,9 +2910,9 @@ define <8 x i16> @urshr8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1))
 ; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -2927,7 +2927,7 @@ define <4 x i32> @urshr4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2937,9 +2937,9 @@ define <4 x i32> @urshr4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1))
 ; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -2954,7 +2954,7 @@ define <2 x i64> @urshr2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2964,9 +2964,9 @@ define <2 x i64> @urshr2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1))
 ; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -2981,7 +2981,7 @@ define <1 x i64> @urshr1d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -2991,9 +2991,9 @@ define <1 x i64> @urshr1d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1))
 ; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
 ;
@@ -3008,7 +3008,7 @@ define i64 @urshr_scalar(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3035,7 +3035,7 @@ define <8 x i8> @srshr8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3045,9 +3045,9 @@ define <8 x i8> @srshr8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1))
 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
 ;
@@ -3062,7 +3062,7 @@ define <4 x i16> @srshr4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3072,9 +3072,9 @@ define <4 x i16> @srshr4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1))
 ; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
 ;
@@ -3089,7 +3089,7 @@ define <2 x i32> @srshr2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3099,9 +3099,9 @@ define <2 x i32> @srshr2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1))
 ; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
@@ -3116,7 +3116,7 @@ define <16 x i8> @srshr16b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3126,9 +3126,9 @@ define <16 x i8> @srshr16b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1))
 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
 ;
@@ -3143,7 +3143,7 @@ define <8 x i16> @srshr8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3153,9 +3153,9 @@ define <8 x i16> @srshr8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1))
 ; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -3170,7 +3170,7 @@ define <4 x i32> @srshr4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3180,9 +3180,9 @@ define <4 x i32> @srshr4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1))
 ; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -3197,7 +3197,7 @@ define <2 x i64> @srshr2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3207,9 +3207,9 @@ define <2 x i64> @srshr2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1))
 ; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -3224,7 +3224,7 @@ define <1 x i64> @srshr1d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3234,9 +3234,9 @@ define <1 x i64> @srshr1d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1))
 ; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
 ;
@@ -3251,7 +3251,7 @@ define i64 @srshr_scalar(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3278,7 +3278,7 @@ define <8 x i8> @sqshlu8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3288,9 +3288,9 @@ define <8 x i8> @sqshlu8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 1))
 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
 ;
@@ -3305,7 +3305,7 @@ define <4 x i16> @sqshlu4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3315,9 +3315,9 @@ define <4 x i16> @sqshlu4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 1))
 ; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
 ;
@@ -3332,7 +3332,7 @@ define <2 x i32> @sqshlu2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3342,9 +3342,9 @@ define <2 x i32> @sqshlu2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 1))
 ; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
@@ -3359,7 +3359,7 @@ define <16 x i8> @sqshlu16b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3369,9 +3369,9 @@ define <16 x i8> @sqshlu16b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1))
 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
 ;
@@ -3386,7 +3386,7 @@ define <8 x i16> @sqshlu8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3396,9 +3396,9 @@ define <8 x i16> @sqshlu8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -3413,7 +3413,7 @@ define <4 x i32> @sqshlu4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3423,9 +3423,9 @@ define <4 x i32> @sqshlu4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -3440,7 +3440,7 @@ define <2 x i64> @sqshlu2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3450,9 +3450,9 @@ define <2 x i64> @sqshlu2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -3467,7 +3467,7 @@ define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3477,9 +3477,9 @@ define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
 ;
@@ -3494,7 +3494,7 @@ define i64 @sqshlu_i64_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3521,7 +3521,7 @@ define i32 @sqshlu_i32_constant(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3560,7 +3560,7 @@ define <8 x i8> @rshrn8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3587,7 +3587,7 @@ define <4 x i16> @rshrn4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3614,7 +3614,7 @@ define <2 x i32> @rshrn2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3642,7 +3642,7 @@ define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3653,7 +3653,7 @@ define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3685,7 +3685,7 @@ define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3696,7 +3696,7 @@ define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3728,7 +3728,7 @@ define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3739,7 +3739,7 @@ define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3774,7 +3774,7 @@ define <8 x i8> @shrn8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3784,9 +3784,9 @@ define <8 x i8> @shrn8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = lshr <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr <8 x i16> [[_MSLD]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1)
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP8]] to <8 x i8>
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
@@ -3804,7 +3804,7 @@ define <4 x i16> @shrn4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3814,9 +3814,9 @@ define <4 x i16> @shrn4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = lshr <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr <4 x i32> [[_MSLD]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16>
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
 ; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
@@ -3834,7 +3834,7 @@ define <2 x i32> @shrn2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3844,9 +3844,9 @@ define <2 x i32> @shrn2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = lshr <2 x i64> [[_MSLD]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr <2 x i64> [[_MSLD]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1)
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
@@ -3865,7 +3865,7 @@ define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3876,7 +3876,7 @@ define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3886,9 +3886,9 @@ define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
-; CHECK-NEXT:    [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD1]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1)
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP14]] to <8 x i8>
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
 ; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSPROP]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -3911,7 +3911,7 @@ define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3922,7 +3922,7 @@ define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3932,9 +3932,9 @@ define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
-; CHECK-NEXT:    [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD1]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16>
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
 ; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSPROP]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -3957,7 +3957,7 @@ define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3968,7 +3968,7 @@ define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -3978,9 +3978,9 @@ define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
-; CHECK-NEXT:    [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD1]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD1]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1)
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP14]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    [[_MSPROP2:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSPROP]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -4021,7 +4021,7 @@ define <8 x i8> @sqshrn8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4048,7 +4048,7 @@ define <4 x i16> @sqshrn4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4075,7 +4075,7 @@ define <2 x i32> @sqshrn2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4104,7 +4104,7 @@ define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4115,7 +4115,7 @@ define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4147,7 +4147,7 @@ define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4158,7 +4158,7 @@ define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4190,7 +4190,7 @@ define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4201,7 +4201,7 @@ define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4252,7 +4252,7 @@ define <8 x i8> @sqshrun8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4279,7 +4279,7 @@ define <4 x i16> @sqshrun4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4306,7 +4306,7 @@ define <2 x i32> @sqshrun2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4334,7 +4334,7 @@ define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4345,7 +4345,7 @@ define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4377,7 +4377,7 @@ define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4388,7 +4388,7 @@ define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4420,7 +4420,7 @@ define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4431,7 +4431,7 @@ define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4482,7 +4482,7 @@ define <8 x i8> @sqrshrn8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4509,7 +4509,7 @@ define <4 x i16> @sqrshrn4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4536,7 +4536,7 @@ define <2 x i32> @sqrshrn2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4564,7 +4564,7 @@ define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4575,7 +4575,7 @@ define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4607,7 +4607,7 @@ define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4618,7 +4618,7 @@ define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4650,7 +4650,7 @@ define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4661,7 +4661,7 @@ define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4712,7 +4712,7 @@ define <8 x i8> @sqrshrun8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4739,7 +4739,7 @@ define <4 x i16> @sqrshrun4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4766,7 +4766,7 @@ define <2 x i32> @sqrshrun2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4794,7 +4794,7 @@ define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4805,7 +4805,7 @@ define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4837,7 +4837,7 @@ define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4848,7 +4848,7 @@ define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4880,7 +4880,7 @@ define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4891,7 +4891,7 @@ define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4942,7 +4942,7 @@ define <8 x i8> @uqrshrn8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4969,7 +4969,7 @@ define <4 x i16> @uqrshrn4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -4996,7 +4996,7 @@ define <2 x i32> @uqrshrn2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5024,7 +5024,7 @@ define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5035,7 +5035,7 @@ define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5067,7 +5067,7 @@ define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5078,7 +5078,7 @@ define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5110,7 +5110,7 @@ define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5121,7 +5121,7 @@ define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5172,7 +5172,7 @@ define <8 x i8> @uqshrn8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5199,7 +5199,7 @@ define <4 x i16> @uqshrn4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5226,7 +5226,7 @@ define <2 x i32> @uqshrn2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5254,7 +5254,7 @@ define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5265,7 +5265,7 @@ define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5297,7 +5297,7 @@ define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5308,7 +5308,7 @@ define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5340,7 +5340,7 @@ define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5351,7 +5351,7 @@ define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5387,7 +5387,7 @@ define <8 x i16> @ushll8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5399,9 +5399,9 @@ define <8 x i16> @ushll8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1)
 ; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -5417,7 +5417,7 @@ define <4 x i32> @ushll4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5429,9 +5429,9 @@ define <4 x i32> @ushll4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -5447,7 +5447,7 @@ define <2 x i64> @ushll2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5459,9 +5459,9 @@ define <2 x i64> @ushll2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1)
 ; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -5477,7 +5477,7 @@ define <8 x i16> @ushll2_8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5487,13 +5487,13 @@ define <8 x i16> @ushll2_8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1), <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = zext <8 x i8> [[_MSPROP]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1)
 ; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -5510,7 +5510,7 @@ define <4 x i32> @ushll2_4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5520,13 +5520,13 @@ define <4 x i32> @ushll2_4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -5543,7 +5543,7 @@ define <2 x i64> @ushll2_2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5553,13 +5553,13 @@ define <2 x i64> @ushll2_2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = zext <2 x i32> [[_MSPROP]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1)
 ; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -5583,7 +5583,7 @@ define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5595,9 +5595,9 @@ define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -5613,7 +5613,7 @@ define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind sanitize_memory
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5648,7 +5648,7 @@ define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind sani
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5660,9 +5660,9 @@ define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind sani
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i8> [[_MSLD]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -5678,7 +5678,7 @@ define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind sanitize_memo
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5688,9 +5688,9 @@ define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind sanitize_memo
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -5705,7 +5705,7 @@ define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5717,9 +5717,9 @@ define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -5736,7 +5736,7 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind sanitize_memo
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5748,9 +5748,9 @@ define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind sanitize_memo
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 -1))
 ; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -5765,9 +5765,9 @@ define <4 x i32> @neon.ushll4s_constant_fold() nounwind sanitize_memory {
 ; CHECK-LABEL: define <4 x i32> @neon.ushll4s_constant_fold(
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> zeroinitializer, <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -5781,7 +5781,7 @@ define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5793,9 +5793,9 @@ define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[TMP2]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[TMP2]], <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -5811,7 +5811,7 @@ define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind sanitize_mem
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5823,9 +5823,9 @@ define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind sanitize_mem
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[TMP2]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
 ;
@@ -5841,7 +5841,7 @@ define i64 @neon.ushl_scalar_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5871,7 +5871,7 @@ define <8 x i16> @sshll8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5883,9 +5883,9 @@ define <8 x i16> @sshll8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1)
 ; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -5901,7 +5901,7 @@ define <2 x i64> @sshll2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5913,9 +5913,9 @@ define <2 x i64> @sshll2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1)
 ; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -5938,7 +5938,7 @@ define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5948,9 +5948,9 @@ define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1))
 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 ;
@@ -5965,7 +5965,7 @@ define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind sanitiz
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -5992,7 +5992,7 @@ define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind sanitize_memo
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6002,9 +6002,9 @@ define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind sanitize_memo
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -2))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -2))
 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 ;
@@ -6019,7 +6019,7 @@ define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6031,9 +6031,9 @@ define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[_MSPROP]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[TMP2]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[TMP2]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -6049,7 +6049,7 @@ define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind sanitize
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6061,9 +6061,9 @@ define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind sanitize
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <4 x i8> [[_MSLD]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -6079,7 +6079,7 @@ define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6091,9 +6091,9 @@ define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -6109,7 +6109,7 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind sanitize_memo
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6121,9 +6121,9 @@ define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind sanitize_memo
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSPROP]], <4 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> splat (i32 -1))
 ; CHECK-NEXT:    store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -6138,9 +6138,9 @@ define <4 x i32> @neon.sshl4s_constant_fold() nounwind sanitize_memory {
 ; CHECK-LABEL: define <4 x i32> @neon.sshl4s_constant_fold(
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> zeroinitializer, <4 x i32> splat (i32 2))
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> splat (i32 2))
 ; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -6154,7 +6154,7 @@ define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6164,9 +6164,9 @@ define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -6181,7 +6181,7 @@ define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6193,9 +6193,9 @@ define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSPROP]], <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -6211,7 +6211,7 @@ define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind sanitize_me
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6223,9 +6223,9 @@ define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind sanitize_me
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64>
-; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[_MSPROP]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[TMP2]], <1 x i64> <i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <1 x i64> [[TMP3]]
 ;
@@ -6241,7 +6241,7 @@ define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6271,7 +6271,7 @@ define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind sanitize_memory
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6300,9 +6300,9 @@ define <2 x i64> @neon.sshl2d_constant_fold() nounwind sanitize_memory {
 ; CHECK-LABEL: define <2 x i64> @neon.sshl2d_constant_fold(
 ; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> zeroinitializer, <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -6316,7 +6316,7 @@ define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6326,9 +6326,9 @@ define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 2, i64 2>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 2))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> <i64 2, i64 2>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> splat (i64 2))
 ; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -6343,7 +6343,7 @@ define <8 x i16> @sshll2_8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6353,13 +6353,13 @@ define <8 x i16> @sshll2_8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1), <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = sext <8 x i8> [[_MSPROP]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], splat (i16 1)
 ; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -6376,7 +6376,7 @@ define <4 x i32> @sshll2_4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6386,13 +6386,13 @@ define <4 x i32> @sshll2_4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = sext <4 x i16> [[_MSPROP]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -6409,7 +6409,7 @@ define <2 x i64> @sshll2_2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6419,13 +6419,13 @@ define <2 x i64> @sshll2_2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1), <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = sext <2 x i32> [[_MSPROP]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 1)
 ; CHECK-NEXT:    store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -6442,7 +6442,7 @@ define <8 x i8> @sqshli8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6452,9 +6452,9 @@ define <8 x i8> @sqshli8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 1))
 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
 ;
@@ -6469,7 +6469,7 @@ define <4 x i16> @sqshli4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6479,9 +6479,9 @@ define <4 x i16> @sqshli4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 1))
 ; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
 ;
@@ -6496,7 +6496,7 @@ define <2 x i32> @sqshli2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6506,9 +6506,9 @@ define <2 x i32> @sqshli2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 1))
 ; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
@@ -6523,7 +6523,7 @@ define <16 x i8> @sqshli16b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6533,9 +6533,9 @@ define <16 x i8> @sqshli16b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1))
 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
 ;
@@ -6550,7 +6550,7 @@ define <8 x i16> @sqshli8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6560,9 +6560,9 @@ define <8 x i16> @sqshli8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -6577,7 +6577,7 @@ define <4 x i32> @sqshli4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6587,9 +6587,9 @@ define <4 x i32> @sqshli4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -6604,7 +6604,7 @@ define <2 x i64> @sqshli2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6614,9 +6614,9 @@ define <2 x i64> @sqshli2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -6631,7 +6631,7 @@ define <8 x i8> @uqshli8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6641,9 +6641,9 @@ define <8 x i8> @uqshli8b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 1))
 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
 ;
@@ -6658,7 +6658,7 @@ define <8 x i8> @uqshli8b_1(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6668,9 +6668,9 @@ define <8 x i8> @uqshli8b_1(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 8))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 8))
 ; CHECK-NEXT:    store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
 ;
@@ -6685,7 +6685,7 @@ define <4 x i16> @uqshli4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6695,9 +6695,9 @@ define <4 x i16> @uqshli4h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 1))
 ; CHECK-NEXT:    store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i16> [[TMP3]]
 ;
@@ -6712,7 +6712,7 @@ define <2 x i32> @uqshli2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6722,9 +6722,9 @@ define <2 x i32> @uqshli2s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 1))
 ; CHECK-NEXT:    store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
@@ -6739,7 +6739,7 @@ define <16 x i8> @uqshli16b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6749,9 +6749,9 @@ define <16 x i8> @uqshli16b(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 1))
 ; CHECK-NEXT:    store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
 ;
@@ -6766,7 +6766,7 @@ define <8 x i16> @uqshli8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6776,9 +6776,9 @@ define <8 x i16> @uqshli8h(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -6793,7 +6793,7 @@ define <4 x i32> @uqshli4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6803,9 +6803,9 @@ define <4 x i32> @uqshli4s(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 1))
 ; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
@@ -6820,7 +6820,7 @@ define <2 x i64> @uqshli2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6830,9 +6830,9 @@ define <2 x i64> @uqshli2d(ptr %A) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
-; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP7:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
@@ -6848,7 +6848,7 @@ define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6858,11 +6858,11 @@ define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6891,7 +6891,7 @@ define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6901,11 +6901,11 @@ define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6934,7 +6934,7 @@ define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6944,11 +6944,11 @@ define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6977,7 +6977,7 @@ define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -6987,11 +6987,11 @@ define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7020,7 +7020,7 @@ define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7030,11 +7030,11 @@ define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7063,7 +7063,7 @@ define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7073,11 +7073,11 @@ define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7106,7 +7106,7 @@ define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7116,11 +7116,11 @@ define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7149,7 +7149,7 @@ define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7159,11 +7159,11 @@ define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7192,7 +7192,7 @@ define i64 @ursra_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7206,7 +7206,7 @@ define i64 @ursra_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP8]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 -1)
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7235,7 +7235,7 @@ define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7245,11 +7245,11 @@ define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> splat (i8 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> splat (i8 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7278,7 +7278,7 @@ define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7288,11 +7288,11 @@ define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> splat (i16 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7321,7 +7321,7 @@ define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7331,11 +7331,11 @@ define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7364,7 +7364,7 @@ define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7374,11 +7374,11 @@ define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> splat (i8 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> splat (i8 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7407,7 +7407,7 @@ define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7417,11 +7417,11 @@ define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> splat (i16 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> splat (i16 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7450,7 +7450,7 @@ define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7460,11 +7460,11 @@ define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7493,7 +7493,7 @@ define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7503,11 +7503,11 @@ define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> splat (i64 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> splat (i64 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7536,7 +7536,7 @@ define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7546,11 +7546,11 @@ define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 -1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> splat (i64 -1))
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 -1>)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> splat (i64 -1))
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7579,7 +7579,7 @@ define i64 @srsra_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7593,7 +7593,7 @@ define i64 @srsra_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP8]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 -1)
 ; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP3]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7622,7 +7622,7 @@ define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7632,11 +7632,11 @@ define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr <8 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr <8 x i8> [[_MSLD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7665,7 +7665,7 @@ define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7675,11 +7675,11 @@ define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i16> [[_MSLD]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7708,7 +7708,7 @@ define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7718,11 +7718,11 @@ define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr <2 x i32> [[_MSLD]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr <2 x i32> [[_MSLD]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7751,7 +7751,7 @@ define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7761,11 +7761,11 @@ define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr <16 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr <16 x i8> [[_MSLD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7794,7 +7794,7 @@ define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7804,11 +7804,11 @@ define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr <8 x i16> [[_MSLD]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7837,7 +7837,7 @@ define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7847,11 +7847,11 @@ define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i32> [[_MSLD]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7880,7 +7880,7 @@ define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7890,11 +7890,11 @@ define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr <2 x i64> [[_MSLD]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr <2 x i64> [[_MSLD]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7923,7 +7923,7 @@ define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7933,11 +7933,11 @@ define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr <1 x i64> [[_MSLD]], <i64 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr <1 x i64> [[_MSLD]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7966,7 +7966,7 @@ define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -7976,11 +7976,11 @@ define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr <8 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr <8 x i8> [[_MSLD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8009,7 +8009,7 @@ define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8019,11 +8019,11 @@ define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr <4 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr <4 x i16> [[_MSLD]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8052,7 +8052,7 @@ define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8062,11 +8062,11 @@ define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr <2 x i32> [[_MSLD]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr <2 x i32> [[_MSLD]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = ashr <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8095,7 +8095,7 @@ define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8105,11 +8105,11 @@ define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr <16 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr <16 x i8> [[_MSLD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = ashr <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = ashr <16 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8138,7 +8138,7 @@ define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8148,11 +8148,11 @@ define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr <8 x i16> [[_MSLD]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8181,7 +8181,7 @@ define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8191,11 +8191,11 @@ define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr <4 x i32> [[_MSLD]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8224,7 +8224,7 @@ define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8234,11 +8234,11 @@ define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr <2 x i64> [[_MSLD]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr <2 x i64> [[_MSLD]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 1)
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8267,7 +8267,7 @@ define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8278,7 +8278,7 @@ define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8288,11 +8288,11 @@ define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
-; CHECK-NEXT:    [[TMP13:%.*]] = lshr <8 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = lshr <8 x i8> [[_MSLD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], splat (i8 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], splat (i8 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]]
@@ -8316,7 +8316,7 @@ define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8327,7 +8327,7 @@ define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8337,11 +8337,11 @@ define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
-; CHECK-NEXT:    [[TMP13:%.*]] = lshr <4 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = lshr <4 x i16> [[_MSLD]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], splat (i16 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], splat (i16 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]]
@@ -8365,7 +8365,7 @@ define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8376,7 +8376,7 @@ define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8386,11 +8386,11 @@ define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
-; CHECK-NEXT:    [[TMP13:%.*]] = lshr <2 x i32> [[_MSLD]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = lshr <2 x i32> [[_MSLD]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]]
@@ -8414,7 +8414,7 @@ define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8425,7 +8425,7 @@ define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8435,11 +8435,11 @@ define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
-; CHECK-NEXT:    [[TMP13:%.*]] = lshr <16 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = lshr <16 x i8> [[_MSLD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], splat (i8 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], splat (i8 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]]
@@ -8463,7 +8463,7 @@ define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8474,7 +8474,7 @@ define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8484,11 +8484,11 @@ define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
-; CHECK-NEXT:    [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], splat (i16 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], splat (i16 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]]
@@ -8512,7 +8512,7 @@ define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8523,7 +8523,7 @@ define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8533,11 +8533,11 @@ define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
-; CHECK-NEXT:    [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]]
@@ -8561,7 +8561,7 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8572,7 +8572,7 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8582,11 +8582,11 @@ define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
-; CHECK-NEXT:    [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], <i64 -1, i64 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], splat (i64 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], splat (i64 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]]
@@ -8610,7 +8610,7 @@ define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8621,7 +8621,7 @@ define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8631,11 +8631,11 @@ define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
-; CHECK-NEXT:    [[TMP13:%.*]] = shl <8 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = shl <8 x i8> [[_MSLD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i8> [[TMP1]], splat (i8 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], splat (i8 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]]
@@ -8659,7 +8659,7 @@ define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8670,7 +8670,7 @@ define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8680,11 +8680,11 @@ define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
-; CHECK-NEXT:    [[TMP13:%.*]] = shl <4 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = shl <4 x i16> [[_MSLD]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], splat (i16 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], splat (i16 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]]
@@ -8708,7 +8708,7 @@ define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8719,7 +8719,7 @@ define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8729,11 +8729,11 @@ define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
-; CHECK-NEXT:    [[TMP13:%.*]] = shl <2 x i32> [[_MSLD]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = shl <2 x i32> [[_MSLD]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]]
@@ -8757,7 +8757,7 @@ define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8768,7 +8768,7 @@ define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8778,11 +8778,11 @@ define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
-; CHECK-NEXT:    [[TMP13:%.*]] = shl <16 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = shl <16 x i8> [[_MSLD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <16 x i8> [[TMP1]], splat (i8 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], splat (i8 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]]
@@ -8806,7 +8806,7 @@ define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8817,7 +8817,7 @@ define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8827,11 +8827,11 @@ define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
-; CHECK-NEXT:    [[TMP13:%.*]] = shl <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = shl <8 x i16> [[_MSLD]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], splat (i16 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], splat (i16 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]]
@@ -8855,7 +8855,7 @@ define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8866,7 +8866,7 @@ define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8876,11 +8876,11 @@ define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
-; CHECK-NEXT:    [[TMP13:%.*]] = shl <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = shl <4 x i32> [[_MSLD]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], splat (i32 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]]
@@ -8904,7 +8904,7 @@ define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8915,7 +8915,7 @@ define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -8925,11 +8925,11 @@ define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
 ; CHECK-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
 ; CHECK-NEXT:    [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
-; CHECK-NEXT:    [[TMP13:%.*]] = shl <2 x i64> [[_MSLD]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = shl <2 x i64> [[_MSLD]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], <i64 -1, i64 -1>
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 1)
+; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], splat (i64 -1)
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], splat (i64 -1)
 ; CHECK-NEXT:    [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]]
@@ -8953,9 +8953,9 @@ define <8 x i16> @shll(<8 x i8> %in) sanitize_memory {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
 ; CHECK-NEXT:    [[EXT:%.*]] = zext <8 x i8> [[IN]] to <8 x i16>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i16> [[_MSPROP]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i16> [[_MSPROP]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[RES:%.*]] = shl <8 x i16> [[EXT]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[RES:%.*]] = shl <8 x i16> [[EXT]], splat (i16 8)
 ; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[RES]]
 ;
@@ -8969,13 +8969,13 @@ define <4 x i32> @shll_high(<8 x i16> %in) sanitize_memory {
 ; CHECK-SAME: <8 x i16> [[IN:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> splat (i16 -1), <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i16> [[IN]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32>
 ; CHECK-NEXT:    [[EXT:%.*]] = zext <4 x i16> [[EXTRACT]] to <4 x i32>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[_MSPROP1]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[_MSPROP1]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[RES:%.*]] = shl <4 x i32> [[EXT]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[RES:%.*]] = shl <4 x i32> [[EXT]], splat (i32 16)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[RES]]
 ;
@@ -8992,7 +8992,7 @@ define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9003,7 +9003,7 @@ define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9018,7 +9018,7 @@ define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
 ; CHECK:       15:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9040,7 +9040,7 @@ define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9051,7 +9051,7 @@ define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9066,7 +9066,7 @@ define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
 ; CHECK:       15:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9088,7 +9088,7 @@ define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9099,7 +9099,7 @@ define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9114,7 +9114,7 @@ define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
 ; CHECK:       15:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9136,7 +9136,7 @@ define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9147,7 +9147,7 @@ define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9162,7 +9162,7 @@ define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
 ; CHECK:       15:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9184,7 +9184,7 @@ define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9195,7 +9195,7 @@ define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9210,7 +9210,7 @@ define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
 ; CHECK:       15:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9232,7 +9232,7 @@ define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9243,7 +9243,7 @@ define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9258,7 +9258,7 @@ define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
 ; CHECK:       15:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9280,7 +9280,7 @@ define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9291,7 +9291,7 @@ define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9306,7 +9306,7 @@ define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
 ; CHECK:       15:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9328,7 +9328,7 @@ define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9339,7 +9339,7 @@ define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9354,7 +9354,7 @@ define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
 ; CHECK:       15:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9411,7 +9411,7 @@ define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9444,7 +9444,7 @@ define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9477,7 +9477,7 @@ define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9510,7 +9510,7 @@ define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanit
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9543,7 +9543,7 @@ define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sani
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9576,7 +9576,7 @@ define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) saniti
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9609,7 +9609,7 @@ define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) saniti
 ; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
 ; CHECK-NEXT:    [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
@@ -9675,7 +9675,7 @@ define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) sanitize_memory {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer
-; CHECK-NEXT:    [[B:%.*]] = add <4 x i32> [[A]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[B:%.*]] = add <4 x i32> [[A]], splat (i32 3)
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[_MSPROP]], i32 13)
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[B]], i32 13)
@@ -9690,5 +9690,5 @@ entry:
 
 declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)
 ;.
-; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
 ;.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst.ll
index 421f00fcbc56bf..deeb1d4b6ff859 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_vst.ll
@@ -24,7 +24,7 @@ define void @st2_8b(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind sanitize_memory {
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    unreachable
@@ -47,9 +47,9 @@ define void @st2_8b_undefA(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind sanitize_m
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP2]], ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -72,9 +72,9 @@ define void @st2_8b_undefB(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind sanitize_m
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -96,9 +96,9 @@ define void @st2_8b_undefAB(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind sanitize_
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr [[TMP4]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP4]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -125,7 +125,7 @@ define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwind sani
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -149,9 +149,9 @@ define void @st3_8b_undefA(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwi
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -175,9 +175,9 @@ define void @st3_8b_undefB(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwi
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP3]], ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -201,9 +201,9 @@ define void @st3_8b_undefC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwi
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> splat (i8 -1), ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -226,9 +226,9 @@ define void @st3_8b_undefAB(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounw
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP2]], ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -251,9 +251,9 @@ define void @st3_8b_undefAC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounw
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP2]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -276,9 +276,9 @@ define void @st3_8b_undefBC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounw
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -300,9 +300,9 @@ define void @st3_8b_undefABC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) noun
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr [[TMP4]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP4]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -330,7 +330,7 @@ define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %P)
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr [[TMP8]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
 ; CHECK:       9:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -355,9 +355,9 @@ define void @st4_8b_undefA(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, p
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -382,9 +382,9 @@ define void @st4_8b_undefB(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, p
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -409,9 +409,9 @@ define void @st4_8b_undefC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, p
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP4]], ptr [[TMP7]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -436,9 +436,9 @@ define void @st4_8b_undefD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, p
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr [[TMP7]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> splat (i8 -1), ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -462,9 +462,9 @@ define void @st4_8b_undefAB(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D,
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -488,9 +488,9 @@ define void @st4_8b_undefAC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D,
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP2]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP3]], ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -514,9 +514,9 @@ define void @st4_8b_undefBC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D,
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP3]], ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -540,9 +540,9 @@ define void @st4_8b_undefBD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D,
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP3]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> [[TMP3]], <8 x i8> splat (i8 -1), ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -565,9 +565,9 @@ define void @st4_8b_undefABC(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D,
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP2]], ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -590,9 +590,9 @@ define void @st4_8b_undefABD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D,
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP2]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -615,9 +615,9 @@ define void @st4_8b_undefACD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D,
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> [[TMP2]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -640,9 +640,9 @@ define void @st4_8b_undefBCD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D,
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -664,9 +664,9 @@ define void @st4_8b_undefABCD(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr [[TMP4]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), ptr [[TMP4]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -698,7 +698,7 @@ define void @st2_16b(<16 x i8> %A, <16 x i8> %B, ptr %P) nounwind sanitize_memor
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -725,7 +725,7 @@ define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %P) nounwind
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -753,7 +753,7 @@ define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr [[TMP8]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
 ; CHECK:       9:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -785,7 +785,7 @@ define void @st2_4h(<4 x i16> %A, <4 x i16> %B, ptr %P) nounwind sanitize_memory
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -812,7 +812,7 @@ define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %P) nounwind s
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i16> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -840,7 +840,7 @@ define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], ptr [[TMP8]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
 ; CHECK:       9:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -872,7 +872,7 @@ define void @st2_8h(<8 x i16> %A, <8 x i16> %B, ptr %P) nounwind sanitize_memory
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -899,7 +899,7 @@ define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %P) nounwind s
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -927,7 +927,7 @@ define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], ptr [[TMP8]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
 ; CHECK:       9:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -959,7 +959,7 @@ define void @st2_2s(<2 x i32> %A, <2 x i32> %B, ptr %P) nounwind sanitize_memory
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -986,7 +986,7 @@ define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %P) nounwind s
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1014,7 +1014,7 @@ define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> [[TMP5]], ptr [[TMP8]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
 ; CHECK:       9:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1044,7 +1044,7 @@ define void @st2_4s(<4 x i32> %A, <4 x i32> %B, ptr %P) nounwind sanitize_memory
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1071,7 +1071,7 @@ define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %P) nounwind s
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1099,7 +1099,7 @@ define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], ptr [[TMP8]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
 ; CHECK:       9:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1132,7 +1132,7 @@ define void @st2_1d(<1 x i64> %A, <1 x i64> %B, ptr %P) nounwind sanitize_memory
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1159,7 +1159,7 @@ define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %P) nounwind s
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1187,7 +1187,7 @@ define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <1 x i64> [[TMP5]], ptr [[TMP8]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
 ; CHECK:       9:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1219,7 +1219,7 @@ define void @st2_2d(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind sanitize_memory
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1242,9 +1242,9 @@ define void @st2_2d_undefA(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind sanitize
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP2]], ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1267,9 +1267,9 @@ define void @st2_2d_undefB(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind sanitize
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> <i64 -1, i64 -1>, ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1291,9 +1291,9 @@ define void @st2_2d_undefAB(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind sanitiz
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, ptr [[TMP4]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP4]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1320,7 +1320,7 @@ define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nounwind s
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1344,9 +1344,9 @@ define void @st3_2d_undefA(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nou
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1370,9 +1370,9 @@ define void @st3_2d_undefB(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nou
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP3]], ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1396,9 +1396,9 @@ define void @st3_2d_undefC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nou
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>, ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1421,9 +1421,9 @@ define void @st3_2d_undefAB(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) no
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP2]], ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1446,9 +1446,9 @@ define void @st3_2d_undefAC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) no
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP2]], <2 x i64> <i64 -1, i64 -1>, ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1471,9 +1471,9 @@ define void @st3_2d_undefBC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) no
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1495,9 +1495,9 @@ define void @st3_2d_undefABC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) n
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, ptr [[TMP4]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP4]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1525,7 +1525,7 @@ define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr
 ; CHECK-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
 ; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]], ptr [[TMP8]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
 ; CHECK:       9:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1554,9 +1554,9 @@ define void @st4_2d_undefA(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1581,9 +1581,9 @@ define void @st4_2d_undefB(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1608,9 +1608,9 @@ define void @st4_2d_undefC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP4]], ptr [[TMP7]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP4]], ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1635,9 +1635,9 @@ define void @st4_2d_undefD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %
 ; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> <i64 -1, i64 -1>, ptr [[TMP7]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i64> splat (i64 -1), ptr [[TMP7]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1661,9 +1661,9 @@ define void @st4_2d_undefAB(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1687,9 +1687,9 @@ define void @st4_2d_undefAC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP2]], <2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP3]], ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1713,9 +1713,9 @@ define void @st4_2d_undefAD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>, ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1739,9 +1739,9 @@ define void @st4_2d_undefBC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP3]], ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1765,9 +1765,9 @@ define void @st4_2d_undefBD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>, ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1791,9 +1791,9 @@ define void @st4_2d_undefCD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
 ; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, ptr [[TMP6]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP6]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1816,9 +1816,9 @@ define void @st4_2d_undefABC(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64>
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP2]], ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1841,9 +1841,9 @@ define void @st4_2d_undefABD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64>
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP2]], <2 x i64> <i64 -1, i64 -1>, ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1866,9 +1866,9 @@ define void @st4_2d_undefACD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64>
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> [[TMP2]], <2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1891,9 +1891,9 @@ define void @st4_2d_undefBCD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64>
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
 ; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, ptr [[TMP5]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP2]], <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP5]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1915,9 +1915,9 @@ define void @st4_2d_undefABCD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 193514046488576
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
-; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 -1, i64 -1>, ptr [[TMP4]])
+; CHECK-NEXT:    call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), ptr [[TMP4]])
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR4]]
 ; CHECK-NEXT:    unreachable
@@ -1929,5 +1929,5 @@ define void @st4_2d_undefABCD(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64
   ret void
 }
 ;.
-; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
 ;.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
index 3d5e4005de5b10..5bb2c3f0e233b9 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
@@ -43,9 +43,9 @@ define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x double> [[A2:%.*]] to <4 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr <4 x i64> [[TMP4]], splat (i64 63)
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1>
-; CHECK-NEXT:    [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP7:%.*]] = ashr <4 x i64> [[TMP1]], splat (i64 63)
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP2]], <4 x i64> [[TMP3]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x double> [[A1:%.*]] to <4 x i64>
@@ -71,9 +71,9 @@ define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1,
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x float> [[A2:%.*]] to <8 x i32>
-; CHECK-NEXT:    [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr <8 x i32> [[TMP4]], splat (i32 31)
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <8 x i32> [[TMP5]] to <8 x i1>
-; CHECK-NEXT:    [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP7:%.*]] = ashr <8 x i32> [[TMP1]], splat (i32 31)
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP2]], <8 x i32> [[TMP3]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x float> [[A1:%.*]] to <8 x i32>
@@ -305,7 +305,7 @@ define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -326,7 +326,7 @@ define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -347,7 +347,7 @@ define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -368,7 +368,7 @@ define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -389,7 +389,7 @@ define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -503,7 +503,7 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(ptr %a0) #0 {
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -527,7 +527,7 @@ define <2 x double> @test_x86_avx_maskload_pd(ptr %a0, <2 x i64> %mask) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
 ; CHECK:       4:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -551,7 +551,7 @@ define <4 x double> @test_x86_avx_maskload_pd_256(ptr %a0, <4 x i64> %mask) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
 ; CHECK:       4:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -575,7 +575,7 @@ define <4 x float> @test_x86_avx_maskload_ps(ptr %a0, <4 x i32> %mask) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
 ; CHECK:       4:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -599,7 +599,7 @@ define <8 x float> @test_x86_avx_maskload_ps_256(ptr %a0, <8 x i32> %mask) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
 ; CHECK:       4:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -627,7 +627,7 @@ define void @test_x86_avx_maskstore_pd(ptr %a0, <2 x i64> %mask, <2 x double> %a
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
 ; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -654,7 +654,7 @@ define void @test_x86_avx_maskstore_pd_256(ptr %a0, <4 x i64> %mask, <4 x double
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0
 ; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -681,7 +681,7 @@ define void @test_x86_avx_maskstore_ps(ptr %a0, <4 x i32> %mask, <4 x float> %a2
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
 ; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -708,7 +708,7 @@ define void @test_x86_avx_maskstore_ps_256(ptr %a0, <8 x i32> %mask, <8 x float>
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i32> [[TMP3]] to i256
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0
 ; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -792,7 +792,7 @@ define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -813,7 +813,7 @@ define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -905,7 +905,7 @@ define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -926,7 +926,7 @@ define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -964,7 +964,7 @@ define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -989,7 +989,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -1009,7 +1009,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i64> [[TMP1]] to i256
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -1032,7 +1032,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) #
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -1050,7 +1050,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -1065,7 +1065,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0
 ; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -1091,7 +1091,7 @@ define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -1363,11 +1363,11 @@ define void @movnt_dq(ptr %p, <2 x i64> %a1) nounwind #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[A2:%.*]] = add <2 x i64> [[A1:%.*]], <i64 1, i64 1>
-; CHECK-NEXT:    [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> <i64 -1, i64 -1>, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT:    [[A2:%.*]] = add <2 x i64> [[A1:%.*]], splat (i64 1)
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = shufflevector <2 x i64> [[_MSPROP]], <2 x i64> splat (i64 -1), <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[A3:%.*]] = shufflevector <2 x i64> [[A2]], <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -1376,7 +1376,7 @@ define void @movnt_dq(ptr %p, <2 x i64> %a1) nounwind #0 {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    store <4 x i64> [[_MSPROP1]], ptr [[TMP7]], align 32
-; CHECK-NEXT:    store <4 x i64> [[A3]], ptr [[P]], align 32, !nontemporal [[META1:![0-9]+]]
+; CHECK-NEXT:    store <4 x i64> [[A3]], ptr [[P]], align 32, !nontemporal [[META2:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   %a2 = add <2 x i64> %a1, <i64 1, i64 1>
@@ -1392,7 +1392,7 @@ define void @movnt_ps(ptr %p, <8 x float> %a) nounwind #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -1401,7 +1401,7 @@ define void @movnt_ps(ptr %p, <8 x float> %a) nounwind #0 {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    store <8 x i32> [[TMP2]], ptr [[TMP7]], align 32
-; CHECK-NEXT:    store <8 x float> [[A:%.*]], ptr [[P]], align 32, !nontemporal [[META1]]
+; CHECK-NEXT:    store <8 x float> [[A:%.*]], ptr [[P]], align 32, !nontemporal [[META2]]
 ; CHECK-NEXT:    ret void
 ;
   tail call void @llvm.x86.avx.movnt.ps.256(ptr %p, <8 x float> %a) nounwind
@@ -1418,7 +1418,7 @@ define void @movnt_pd(ptr %p, <4 x double> %a1) nounwind #0 {
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[A2:%.*]] = fadd <4 x double> [[A1:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -1427,7 +1427,7 @@ define void @movnt_pd(ptr %p, <4 x double> %a1) nounwind #0 {
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    store <4 x i64> [[_MSPROP]], ptr [[TMP7]], align 32
-; CHECK-NEXT:    store <4 x double> [[A2]], ptr [[P]], align 32, !nontemporal [[META1]]
+; CHECK-NEXT:    store <4 x double> [[A2]], ptr [[P]], align 32, !nontemporal [[META2]]
 ; CHECK-NEXT:    ret void
 ;
   %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
index 5efb7eb4078987..1602e85d8516d2 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
@@ -160,7 +160,7 @@ define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <32 x i8> [[TMP1]] to i256
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]]
 ; CHECK-NEXT:    unreachable
@@ -216,7 +216,7 @@ define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) #0 {
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i64> [[TMP4]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i64>
-; CHECK-NEXT:    [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], <i64 48, i64 48, i64 48, i64 48>
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr <4 x i64> [[TMP6]], splat (i64 48)
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
 ; CHECK-NEXT:    store <4 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i64> [[RES]]
@@ -489,7 +489,7 @@ define <16 x i16> @test_x86_avx2_psrl_w_load(<16 x i16> %a0, ptr %p) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -684,7 +684,7 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) #
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -797,7 +797,7 @@ define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) #0 {
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -817,7 +817,7 @@ define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -832,7 +832,7 @@ define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) #0 {
 ; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <32 x i8> [[TMP2]] to i256
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i256 [[TMP9]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -885,9 +885,9 @@ define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8>
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <32 x i8>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:    [[TMP4:%.*]] = ashr <32 x i8> [[A2:%.*]], splat (i8 7)
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc <32 x i8> [[TMP4]] to <32 x i1>
-; CHECK-NEXT:    [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr <32 x i8> [[TMP1]], splat (i8 7)
 ; CHECK-NEXT:    [[TMP7:%.*]] = trunc <32 x i8> [[TMP6]] to <32 x i1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> [[TMP2]], <32 x i8> [[TMP3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = xor <32 x i8> [[A1:%.*]], [[A0:%.*]]
@@ -978,7 +978,7 @@ define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) #0 {
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1002,7 +1002,7 @@ define <2 x i64> @test_x86_avx2_maskload_q(ptr %a0, <2 x i64> %a1) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
 ; CHECK:       4:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1026,7 +1026,7 @@ define <4 x i64> @test_x86_avx2_maskload_q_256(ptr %a0, <4 x i64> %a1) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to i256
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
 ; CHECK:       4:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1050,7 +1050,7 @@ define <4 x i32> @test_x86_avx2_maskload_d(ptr %a0, <4 x i32> %a1) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP3]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
 ; CHECK:       4:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1074,7 +1074,7 @@ define <8 x i32> @test_x86_avx2_maskload_d_256(ptr %a0, <8 x i32> %a1) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP3]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
 ; CHECK:       4:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1102,7 +1102,7 @@ define void @test_x86_avx2_maskstore_q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2) #0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
 ; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1129,7 +1129,7 @@ define void @test_x86_avx2_maskstore_q_256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i64> [[TMP3]] to i256
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0
 ; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1156,7 +1156,7 @@ define void @test_x86_avx2_maskstore_d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2) #0
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP5]], 0
 ; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1183,7 +1183,7 @@ define void @test_x86_avx2_maskstore_d_256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i32> [[TMP3]] to i256
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i256 [[TMP5]], 0
 ; CHECK-NEXT:    [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
 ; CHECK:       6:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1438,7 +1438,7 @@ define <2 x i64> @test_x86_avx2_psrlv_q_const() #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 -1>)
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i64> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> <i64 4, i64 4>, <2 x i64> <i64 1, i64 -1>)
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> splat (i64 4), <2 x i64> <i64 1, i64 -1>)
 ; CHECK-NEXT:    store <2 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[RES]]
 ;
@@ -1471,7 +1471,7 @@ define <4 x i64> @test_x86_avx2_psrlv_q_256_const() #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> zeroinitializer, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i64> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 4>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> splat (i64 4), <4 x i64> <i64 1, i64 1, i64 1, i64 -1>)
 ; CHECK-NEXT:    store <4 x i64> [[TMP2]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i64> [[RES]]
 ;
@@ -1560,7 +1560,7 @@ define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, ptr %a1, <4 x i
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1593,7 +1593,7 @@ define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, ptr %a1, <4
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1626,7 +1626,7 @@ define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, ptr %a1, <2 x i
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1659,7 +1659,7 @@ define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, ptr %a1, <4
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1692,7 +1692,7 @@ define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, ptr %a1, <4 x i32
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1725,7 +1725,7 @@ define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, ptr %a1, <8 x
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i32> [[TMP4]] to i256
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1758,7 +1758,7 @@ define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, ptr %a1, <2 x i64
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1791,7 +1791,7 @@ define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, ptr %a1, <4 x
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1824,7 +1824,7 @@ define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, ptr %a1, <4 x i32> %id
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1857,7 +1857,7 @@ define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, ptr %a1, <4 x i32>
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1890,7 +1890,7 @@ define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, ptr %a1, <2 x i64> %id
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1923,7 +1923,7 @@ define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, ptr %a1, <4 x i64>
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i64> [[TMP4]] to i256
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1956,7 +1956,7 @@ define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, ptr %a1, <4 x i32> %id
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -1989,7 +1989,7 @@ define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, ptr %a1, <8 x i32>
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i32> [[TMP4]] to i256
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i256 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -2022,7 +2022,7 @@ define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, ptr %a1, <2 x i64> %id
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -2055,7 +2055,7 @@ define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, ptr %a1, <4 x i64>
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -2089,14 +2089,14 @@ define <8 x float>  @test_gather_mask(<8 x float> %a0, ptr %a, <8 x i32> %idx, <
 ; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i32> [[TMP4]] to i256
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
 ; CHECK:       9:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       10:
 ; CHECK-NEXT:    [[RES:%.*]] = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> [[A0:%.*]], ptr [[A:%.*]], <8 x i32> [[IDX:%.*]], <8 x float> [[MASK:%.*]], i8 4)
 ; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP6]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP6]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
 ; CHECK:       11:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
@@ -2136,7 +2136,7 @@ define <2 x i64> @test_mask_demanded_bits(<2 x i64> %a0, ptr %a1, <2 x i64> %idx
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[_MSPROP]] to i128
 ; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP7]], 0
 ; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
index e9323f6dd33083..9d075f7974cd91 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
@@ -162,9 +162,9 @@ define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6:[0-9]+]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
@@ -183,9 +183,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
@@ -206,9 +206,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       3:
 ; CHECK-NEXT:    [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
@@ -218,9 +218,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       9:
 ; CHECK-NEXT:    [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]])
@@ -243,9 +243,9 @@ define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]])
@@ -263,9 +263,9 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext(<2 x double> %a0) nounwind #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0:%.*]])
@@ -283,9 +283,9 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       3:
 ; CHECK-NEXT:    [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
@@ -295,9 +295,9 @@ define <4 x float> @test_x86_sse2_cvtpd2ps_zext_load(ptr %p0) nounwind #0 {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       9:
 ; CHECK-NEXT:    [[CVT:%.*]] = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> [[A0]])
@@ -317,9 +317,9 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]])
@@ -338,9 +338,9 @@ define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[A0:%.*]])
@@ -361,9 +361,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) #0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       6:
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1:%.*]])
@@ -382,9 +382,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, ptr %p1) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16
@@ -395,9 +395,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load(<4 x float> %a0, ptr %p1) #0 {
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       11:
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]])
@@ -416,9 +416,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, ptr %p1
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[A1:%.*]] = load <2 x double>, ptr [[P1:%.*]], align 16
@@ -429,9 +429,9 @@ define <4 x float> @test_x86_sse2_cvtsd2ss_load_optsize(<4 x float> %a0, ptr %p1
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[_MSLD]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i64 [[TMP8]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       11:
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> [[A0:%.*]], <2 x double> [[A1]])
@@ -450,9 +450,9 @@ define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]])
@@ -471,9 +471,9 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext(<2 x double> %a0) nounwind #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0:%.*]])
@@ -494,9 +494,9 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       3:
 ; CHECK-NEXT:    [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
@@ -506,9 +506,9 @@ define <2 x i64> @test_mm_cvttpd_epi32_zext_load(ptr %p0) nounwind #0 {
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; CHECK:       8:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       9:
 ; CHECK-NEXT:    [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> [[A0]])
@@ -531,9 +531,9 @@ define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> [[A0:%.*]])
@@ -552,9 +552,9 @@ define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[A0:%.*]])
@@ -639,9 +639,9 @@ define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> [[A0:%.*]])
@@ -810,9 +810,9 @@ define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> [[A0:%.*]])
@@ -866,7 +866,7 @@ define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) #0 {
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
-; CHECK-NEXT:    [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], <i64 48, i64 48>
+; CHECK-NEXT:    [[TMP7:%.*]] = lshr <2 x i64> [[TMP6]], splat (i64 48)
 ; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> [[A0:%.*]], <16 x i8> [[A1:%.*]])
 ; CHECK-NEXT:    store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <2 x i64> [[RES]]
@@ -1139,9 +1139,9 @@ define <8 x i16> @test_x86_sse2_psrl_w_load(<8 x i16> %a0, ptr %p) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       4:
 ; CHECK-NEXT:    [[A1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 16
@@ -1365,9 +1365,9 @@ define void @clflush(ptr %p) nounwind #0 {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
 ; CHECK:       2:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR5]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       3:
 ; CHECK-NEXT:    tail call void @llvm.x86.sse2.clflush(ptr [[P:%.*]])
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll
index a71455821bd6bd..f5e8452fd0d585 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse41-intrinsics-x86.ll
@@ -11,9 +11,9 @@ define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1,
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x double> [[A2:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], <i64 63, i64 63>
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr <2 x i64> [[TMP4]], splat (i64 63)
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i1>
-; CHECK-NEXT:    [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], <i64 63, i64 63>
+; CHECK-NEXT:    [[TMP7:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 63)
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc <2 x i64> [[TMP7]] to <2 x i1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <2 x double> [[A1:%.*]] to <2 x i64>
@@ -39,9 +39,9 @@ define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x float> [[A2:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP5:%.*]] = ashr <4 x i32> [[TMP4]], splat (i32 31)
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i1>
-; CHECK-NEXT:    [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP7:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31)
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1>
 ; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x float> [[A1:%.*]] to <4 x i32>
@@ -112,7 +112,7 @@ define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) #0
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -138,7 +138,7 @@ define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) #0 {
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 ; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
 ; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -158,7 +158,7 @@ define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(ptr %ptr, <16 x i8> %a1) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -173,7 +173,7 @@ define <8 x i16> @test_x86_sse41_mpsadbw_load_op0(ptr %ptr, <16 x i8> %a1) #0 {
 ; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
 ; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
 ; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -226,9 +226,9 @@ define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:    [[TMP4:%.*]] = ashr <16 x i8> [[A2:%.*]], splat (i8 7)
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc <16 x i8> [[TMP4]] to <16 x i1>
-; CHECK-NEXT:    [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr <16 x i8> [[TMP1]], splat (i8 7)
 ; CHECK-NEXT:    [[TMP7:%.*]] = trunc <16 x i8> [[TMP6]] to <16 x i1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = select <16 x i1> [[TMP5]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = xor <16 x i8> [[A1:%.*]], [[A0:%.*]]
@@ -322,7 +322,7 @@ define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -343,7 +343,7 @@ define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) #0 {
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -380,7 +380,7 @@ define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, ptr %a1) #0
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
@@ -407,7 +407,7 @@ define <4 x float> @test_x86_sse41_round_ss_load(<4 x float> %a0, ptr %a1) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
 ; CHECK:       3:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn()
 ; CHECK-NEXT:    unreachable
diff --git a/llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll b/llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll
index 24fde53eba63a9..ff37605acaddd9 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/masked-store-load.ll
@@ -42,7 +42,7 @@ define void @Store(ptr %p, <4 x i64> %v, <4 x i1> %mask) sanitize_memory {
 ; ADDR-NEXT:    [[TMP6:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
 ; ADDR-NEXT:    [[_MSCMP1:%.*]] = icmp ne i4 [[TMP6]], 0
 ; ADDR-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; ADDR-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0:![0-9]+]]
+; ADDR-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1:![0-9]+]]
 ; ADDR:       7:
 ; ADDR-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7:[0-9]+]]
 ; ADDR-NEXT:    unreachable
@@ -112,7 +112,7 @@ define <4 x double> @Load(ptr %p, <4 x double> %v, <4 x i1> %mask) sanitize_memo
 ; ADDR-NEXT:    [[TMP6:%.*]] = bitcast <4 x i1> [[TMP1]] to i4
 ; ADDR-NEXT:    [[_MSCMP1:%.*]] = icmp ne i4 [[TMP6]], 0
 ; ADDR-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; ADDR-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF0]]
+; ADDR-NEXT:    br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
 ; ADDR:       7:
 ; ADDR-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
 ; ADDR-NEXT:    unreachable
@@ -238,7 +238,7 @@ define <16 x float> @Gather(<16 x ptr> %ptrs, <16 x i1> %mask, <16 x float> %pas
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint <16 x ptr> [[PTRS:%.*]] to <16 x i64>
-; CHECK-NEXT:    [[TMP3:%.*]] = xor <16 x i64> [[TMP2]], <i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080>
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <16 x i64> [[TMP2]], splat (i64 87960930222080)
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr <16 x i64> [[TMP3]] to <16 x ptr>
 ; CHECK-NEXT:    [[_MSMASKEDGATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP4]], i32 4, <16 x i1> [[MASK:%.*]], <16 x i32> [[TMP1]])
 ; CHECK-NEXT:    [[RET:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[PTRS]], i32 4, <16 x i1> [[MASK]], <16 x float> [[PASSTHRU:%.*]])
@@ -252,7 +252,7 @@ define <16 x float> @Gather(<16 x ptr> %ptrs, <16 x i1> %mask, <16 x float> %pas
 ; ADDR-NEXT:    call void @llvm.donothing()
 ; ADDR-NEXT:    [[_MSMASKEDPTRS:%.*]] = select <16 x i1> [[MASK:%.*]], <16 x i64> [[TMP2]], <16 x i64> zeroinitializer
 ; ADDR-NEXT:    [[TMP4:%.*]] = ptrtoint <16 x ptr> [[PTRS:%.*]] to <16 x i64>
-; ADDR-NEXT:    [[TMP5:%.*]] = xor <16 x i64> [[TMP4]], <i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080>
+; ADDR-NEXT:    [[TMP5:%.*]] = xor <16 x i64> [[TMP4]], splat (i64 87960930222080)
 ; ADDR-NEXT:    [[TMP6:%.*]] = inttoptr <16 x i64> [[TMP5]] to <16 x ptr>
 ; ADDR-NEXT:    [[_MSMASKEDGATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP6]], i32 4, <16 x i1> [[MASK]], <16 x i32> [[TMP3]])
 ; ADDR-NEXT:    [[TMP7:%.*]] = bitcast <16 x i1> [[TMP1]] to i16
@@ -260,7 +260,7 @@ define <16 x float> @Gather(<16 x ptr> %ptrs, <16 x i1> %mask, <16 x float> %pas
 ; ADDR-NEXT:    [[TMP8:%.*]] = bitcast <16 x i64> [[_MSMASKEDPTRS]] to i1024
 ; ADDR-NEXT:    [[_MSCMP1:%.*]] = icmp ne i1024 [[TMP8]], 0
 ; ADDR-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; ADDR-NEXT:    br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; ADDR-NEXT:    br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
 ; ADDR:       9:
 ; ADDR-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
 ; ADDR-NEXT:    unreachable
@@ -274,9 +274,9 @@ define <16 x float> @Gather(<16 x ptr> %ptrs, <16 x i1> %mask, <16 x float> %pas
 ; ORIGINS-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 136) to ptr), align 4
 ; ORIGINS-NEXT:    call void @llvm.donothing()
 ; ORIGINS-NEXT:    [[TMP3:%.*]] = ptrtoint <16 x ptr> [[PTRS:%.*]] to <16 x i64>
-; ORIGINS-NEXT:    [[TMP4:%.*]] = xor <16 x i64> [[TMP3]], <i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080>
+; ORIGINS-NEXT:    [[TMP4:%.*]] = xor <16 x i64> [[TMP3]], splat (i64 87960930222080)
 ; ORIGINS-NEXT:    [[TMP5:%.*]] = inttoptr <16 x i64> [[TMP4]] to <16 x ptr>
-; ORIGINS-NEXT:    [[TMP6:%.*]] = add <16 x i64> [[TMP4]], <i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416>
+; ORIGINS-NEXT:    [[TMP6:%.*]] = add <16 x i64> [[TMP4]], splat (i64 17592186044416)
 ; ORIGINS-NEXT:    [[TMP7:%.*]] = inttoptr <16 x i64> [[TMP6]] to <16 x ptr>
 ; ORIGINS-NEXT:    [[_MSMASKEDGATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP5]], i32 4, <16 x i1> [[MASK:%.*]], <16 x i32> [[TMP1]])
 ; ORIGINS-NEXT:    [[RET:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[PTRS]], i32 4, <16 x i1> [[MASK]], <16 x float> [[PASSTHRU:%.*]])
@@ -319,7 +319,7 @@ define void @Scatter(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask) sanitize
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i64> [[TMP2]], <i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080>
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i64> [[TMP2]], splat (i64 87960930222080)
 ; CHECK-NEXT:    [[TMP4:%.*]] = inttoptr <8 x i64> [[TMP3]] to <8 x ptr>
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP1]], <8 x ptr> [[TMP4]], i32 8, <8 x i1> [[MASK:%.*]])
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]])
@@ -332,7 +332,7 @@ define void @Scatter(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask) sanitize
 ; ADDR-NEXT:    call void @llvm.donothing()
 ; ADDR-NEXT:    [[_MSMASKEDPTRS:%.*]] = select <8 x i1> [[MASK:%.*]], <8 x i64> [[TMP2]], <8 x i64> zeroinitializer
 ; ADDR-NEXT:    [[TMP4:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64>
-; ADDR-NEXT:    [[TMP5:%.*]] = xor <8 x i64> [[TMP4]], <i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080>
+; ADDR-NEXT:    [[TMP5:%.*]] = xor <8 x i64> [[TMP4]], splat (i64 87960930222080)
 ; ADDR-NEXT:    [[TMP6:%.*]] = inttoptr <8 x i64> [[TMP5]] to <8 x ptr>
 ; ADDR-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP3]], <8 x ptr> [[TMP6]], i32 8, <8 x i1> [[MASK]])
 ; ADDR-NEXT:    [[TMP7:%.*]] = bitcast <8 x i1> [[TMP1]] to i8
@@ -340,7 +340,7 @@ define void @Scatter(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask) sanitize
 ; ADDR-NEXT:    [[TMP8:%.*]] = bitcast <8 x i64> [[_MSMASKEDPTRS]] to i512
 ; ADDR-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
 ; ADDR-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; ADDR-NEXT:    br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF0]]
+; ADDR-NEXT:    br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
 ; ADDR:       9:
 ; ADDR-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
 ; ADDR-NEXT:    unreachable
@@ -353,9 +353,9 @@ define void @Scatter(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask) sanitize
 ; ORIGINS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
 ; ORIGINS-NEXT:    call void @llvm.donothing()
 ; ORIGINS-NEXT:    [[TMP3:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64>
-; ORIGINS-NEXT:    [[TMP4:%.*]] = xor <8 x i64> [[TMP3]], <i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080>
+; ORIGINS-NEXT:    [[TMP4:%.*]] = xor <8 x i64> [[TMP3]], splat (i64 87960930222080)
 ; ORIGINS-NEXT:    [[TMP5:%.*]] = inttoptr <8 x i64> [[TMP4]] to <8 x ptr>
-; ORIGINS-NEXT:    [[TMP6:%.*]] = add <8 x i64> [[TMP4]], <i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416>
+; ORIGINS-NEXT:    [[TMP6:%.*]] = add <8 x i64> [[TMP4]], splat (i64 17592186044416)
 ; ORIGINS-NEXT:    [[TMP7:%.*]] = inttoptr <8 x i64> [[TMP6]] to <8 x ptr>
 ; ORIGINS-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP1]], <8 x ptr> [[TMP5]], i32 8, <8 x i1> [[MASK:%.*]])
 ; ORIGINS-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]])
@@ -369,7 +369,7 @@ define void @ScatterNoSanitize(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask
 ; CHECK-LABEL: @ScatterNoSanitize(
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], <i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 87960930222080)
 ; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr <8 x i64> [[TMP2]] to <8 x ptr>
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> zeroinitializer, <8 x ptr> [[TMP3]], i32 8, <8 x i1> [[MASK:%.*]])
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]])
@@ -379,7 +379,7 @@ define void @ScatterNoSanitize(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask
 ; ADDR-NEXT:    call void @llvm.donothing()
 ; ADDR-NEXT:    [[_MSMASKEDPTRS:%.*]] = select <8 x i1> [[MASK:%.*]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
 ; ADDR-NEXT:    [[TMP1:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64>
-; ADDR-NEXT:    [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], <i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080>
+; ADDR-NEXT:    [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 87960930222080)
 ; ADDR-NEXT:    [[TMP3:%.*]] = inttoptr <8 x i64> [[TMP2]] to <8 x ptr>
 ; ADDR-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> zeroinitializer, <8 x ptr> [[TMP3]], i32 8, <8 x i1> [[MASK]])
 ; ADDR-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]])
@@ -388,9 +388,9 @@ define void @ScatterNoSanitize(<8 x i32> %value, <8 x ptr> %ptrs, <8 x i1> %mask
 ; ORIGINS-LABEL: @ScatterNoSanitize(
 ; ORIGINS-NEXT:    call void @llvm.donothing()
 ; ORIGINS-NEXT:    [[TMP1:%.*]] = ptrtoint <8 x ptr> [[PTRS:%.*]] to <8 x i64>
-; ORIGINS-NEXT:    [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], <i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080, i64 87960930222080>
+; ORIGINS-NEXT:    [[TMP2:%.*]] = xor <8 x i64> [[TMP1]], splat (i64 87960930222080)
 ; ORIGINS-NEXT:    [[TMP3:%.*]] = inttoptr <8 x i64> [[TMP2]] to <8 x ptr>
-; ORIGINS-NEXT:    [[TMP4:%.*]] = add <8 x i64> [[TMP2]], <i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416, i64 17592186044416>
+; ORIGINS-NEXT:    [[TMP4:%.*]] = add <8 x i64> [[TMP2]], splat (i64 17592186044416)
 ; ORIGINS-NEXT:    [[TMP5:%.*]] = inttoptr <8 x i64> [[TMP4]] to <8 x ptr>
 ; ORIGINS-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> zeroinitializer, <8 x ptr> [[TMP3]], i32 8, <8 x i1> [[MASK:%.*]])
 ; ORIGINS-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[VALUE:%.*]], <8 x ptr> [[PTRS]], i32 8, <8 x i1> [[MASK]])
@@ -426,7 +426,7 @@ define <16 x float> @ExpandLoad(ptr %ptr, <16 x i1> %mask, <16 x float> %passthr
 ; ADDR-NEXT:    [[TMP7:%.*]] = bitcast <16 x i1> [[TMP2]] to i16
 ; ADDR-NEXT:    [[_MSCMP1:%.*]] = icmp ne i16 [[TMP7]], 0
 ; ADDR-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; ADDR-NEXT:    br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; ADDR-NEXT:    br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; ADDR:       8:
 ; ADDR-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
 ; ADDR-NEXT:    unreachable
@@ -504,7 +504,7 @@ define void @CompressStore(<16 x float> %value, ptr %ptr, <16 x i1> %mask) sanit
 ; ADDR-NEXT:    [[TMP7:%.*]] = bitcast <16 x i1> [[TMP2]] to i16
 ; ADDR-NEXT:    [[_MSCMP1:%.*]] = icmp ne i16 [[TMP7]], 0
 ; ADDR-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; ADDR-NEXT:    br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; ADDR-NEXT:    br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
 ; ADDR:       8:
 ; ADDR-NEXT:    call void @__msan_warning_noreturn() #[[ATTR7]]
 ; ADDR-NEXT:    unreachable
diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll
index 98a9cdc6960494..335e47d915c3f0 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/vector-track-origins-neon.ll
@@ -75,10 +75,8 @@ attributes #0 = { mustprogress noreturn nounwind sanitize_memory "no-trapping-ma
 !4 = !{!"Simple C++ TBAA"}
 !5 = distinct !{!5, !6}
 !6 = !{!"llvm.loop.mustprogress"}
-;.
 ; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
 ; CHECK: [[META1]] = !{!"omnipotent char", [[META2:![0-9]+]], i64 0}
 ; CHECK: [[META2]] = !{!"Simple C++ TBAA"}
 ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
 ; CHECK: [[META4]] = !{!"llvm.loop.mustprogress"}
-;.
diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll
index 2131162bf4bf3f..03c5e917f07650 100644
--- a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll
+++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll
@@ -134,8 +134,8 @@ define <2 x float> @return_param_add_return_float_vector(<2 x float> %a) sanitiz
 ; CHECK-NEXT:    [[TMP3:%.*]] = fpext <2 x float> [[A:%.*]] to <2 x double>
 ; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]]
 ; CHECK-NEXT:    store i64 0, ptr @__nsan_shadow_args_tag, align 8
-; CHECK-NEXT:    [[B:%.*]] = fadd <2 x float> [[A]], <float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[B:%.*]] = fadd <2 x float> [[A]], splat (float 1.000000e+00)
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[B]], i64 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP6]], double [[TMP7]], i32 1, i64 0)
@@ -396,8 +396,8 @@ define void @load_add_store_vector(<2 x float>* %a) sanitize_numerical_stability
 ; CHECK-NEXT:    br label [[TMP6]]
 ; CHECK:       6:
 ; CHECK-NEXT:    [[TMP7:%.*]] = phi <2 x double> [ [[TMP3]], [[TMP2]] ], [ [[TMP5]], [[TMP4]] ]
-; CHECK-NEXT:    [[C:%.*]] = fadd <2 x float> [[B]], <float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP8:%.*]] = fadd <2 x double> [[TMP7]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[C:%.*]] = fadd <2 x float> [[B]], splat (float 1.000000e+00)
+; CHECK-NEXT:    [[TMP8:%.*]] = fadd <2 x double> [[TMP7]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP9:%.*]] = call ptr @__nsan_get_shadow_ptr_for_float_store(ptr [[A]], i64 2)
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[C]], i64 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x double> [[TMP8]], i64 0
@@ -772,8 +772,8 @@ define void @vector_shuffle(<2 x float> %0) sanitize_numerical_stability {
 ; CHECK-NEXT:    [[TMP4:%.*]] = fpext <2 x float> [[TMP0:%.*]] to <2 x double>
 ; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP2]], <2 x double> [[TMP3]], <2 x double> [[TMP4]]
 ; CHECK-NEXT:    store i64 0, ptr @__nsan_shadow_args_tag, align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> <double 1.000000e+00, double 1.000000e+00>, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> splat (float 1.000000e+00), <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> splat (double 1.000000e+00), <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll b/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll
index f26463815c16d6..418ad0da26a95f 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/ARM/fptosisat.ll
@@ -200,8 +200,8 @@ define i32 @f16_i8(half %in) {
 define <2 x i64> @v2f32_i32(<2 x float> %in) {
 ; CHECK-BASE-LABEL: @v2f32_i32(
 ; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi <2 x float> [[IN:%.*]] to <2 x i64>
-; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> <i64 2147483647, i64 2147483647>)
-; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> <i64 -2147483648, i64 -2147483648>)
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[CONV]], <2 x i64> splat (i64 2147483647))
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[MIN]], <2 x i64> splat (i64 -2147483648))
 ; CHECK-BASE-NEXT:    ret <2 x i64> [[MAX]]
 ;
 ; CHECK-MVEFP-LABEL: @v2f32_i32(
@@ -223,8 +223,8 @@ define <2 x i64> @v2f32_i32(<2 x float> %in) {
 define <4 x i64> @v4f32_i32(<4 x float> %in) {
 ; CHECK-BASE-LABEL: @v4f32_i32(
 ; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi <4 x float> [[IN:%.*]] to <4 x i64>
-; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
-; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <4 x i64> @llvm.smin.v4i64(<4 x i64> [[CONV]], <4 x i64> splat (i64 2147483647))
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[MIN]], <4 x i64> splat (i64 -2147483648))
 ; CHECK-BASE-NEXT:    ret <4 x i64> [[MAX]]
 ;
 ; CHECK-MVEFP-LABEL: @v4f32_i32(
@@ -246,8 +246,8 @@ define <4 x i64> @v4f32_i32(<4 x float> %in) {
 define <8 x i64> @v8f32_i32(<8 x float> %in) {
 ; CHECK-BASE-LABEL: @v8f32_i32(
 ; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi <8 x float> [[IN:%.*]] to <8 x i64>
-; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>)
-; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> <i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648, i64 -2147483648>)
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <8 x i64> @llvm.smin.v8i64(<8 x i64> [[CONV]], <8 x i64> splat (i64 2147483647))
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <8 x i64> @llvm.smax.v8i64(<8 x i64> [[MIN]], <8 x i64> splat (i64 -2147483648))
 ; CHECK-BASE-NEXT:    ret <8 x i64> [[MAX]]
 ;
 ; CHECK-MVEFP-LABEL: @v8f32_i32(
@@ -269,8 +269,8 @@ define <8 x i64> @v8f32_i32(<8 x float> %in) {
 define <4 x i32> @v4f16_i16(<4 x half> %in) {
 ; CHECK-BASE-LABEL: @v4f16_i16(
 ; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32>
-; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>)
-; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> splat (i32 32767))
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> splat (i32 -32768))
 ; CHECK-BASE-NEXT:    ret <4 x i32> [[MAX]]
 ;
 ; CHECK-MVEFP-LABEL: @v4f16_i16(
@@ -292,8 +292,8 @@ define <4 x i32> @v4f16_i16(<4 x half> %in) {
 define <8 x i32> @v8f16_i16(<8 x half> %in) {
 ; CHECK-BASE-LABEL: @v8f16_i16(
 ; CHECK-BASE-NEXT:    [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32>
-; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
-; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+; CHECK-BASE-NEXT:    [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> splat (i32 32767))
+; CHECK-BASE-NEXT:    [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> splat (i32 -32768))
 ; CHECK-BASE-NEXT:    ret <8 x i32> [[MAX]]
 ;
 ; CHECK-MVEFP-LABEL: @v8f16_i16(
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/fptosisat.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/fptosisat.ll
index 4050da245c88df..252f31ec687476 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/fptosisat.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/fptosisat.ll
@@ -159,8 +159,8 @@ define <4 x i64> @v4f32_i32(<4 x float> %in) {
 define <8 x i32> @v8f16_i16(<8 x half> %in) {
 ; CHECK-LABEL: @v8f16_i16(
 ; CHECK-NEXT:    [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32>
-; CHECK-NEXT:    [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
-; CHECK-NEXT:    [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768, i32 -32768>)
+; CHECK-NEXT:    [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> splat (i32 32767))
+; CHECK-NEXT:    [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> splat (i32 -32768))
 ; CHECK-NEXT:    ret <8 x i32> [[MAX]]
 ;
   %conv = fptosi <8 x half> %in to <8 x i32>
diff --git a/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll b/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll
index 9eb7ef1a1d46ee..5c914deb089671 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/masked-cmp.ll
@@ -18,7 +18,7 @@ define i32 @anyset_two_bit_mask(i32 %x) {
 
 define <2 x i32> @anyset_two_bit_mask_uniform(<2 x i32> %x) {
 ; CHECK-LABEL: @anyset_two_bit_mask_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 9, i32 9>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 9)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
@@ -48,7 +48,7 @@ define i32 @anyset_four_bit_mask(i32 %x) {
 
 define <2 x i32> @anyset_four_bit_mask_uniform(<2 x i32> %x) {
 ; CHECK-LABEL: @anyset_four_bit_mask_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 297, i32 297>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 297)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
@@ -83,7 +83,7 @@ define i32 @anyset_three_bit_mask_all_shifted_bits(i32 %x) {
 
 define <2 x i32> @anyset_three_bit_mask_all_shifted_bits_uniform(<2 x i32> %x) {
 ; CHECK-LABEL: @anyset_three_bit_mask_all_shifted_bits_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 296, i32 296>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 296)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
@@ -114,8 +114,8 @@ define i32 @allset_two_bit_mask(i32 %x) {
 
 define <2 x i32> @allset_two_bit_mask_uniform(<2 x i32> %x) {
 ; CHECK-LABEL: @allset_two_bit_mask_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 129, i32 129>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 129, i32 129>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 129)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 129)
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
diff --git a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
index f6369a95bccf92..4a89705e17749c 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/popcount.ll
@@ -19,7 +19,7 @@ define signext i32 @popcount8(i8 zeroext %0) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = lshr i8 [[TMP8]], 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nuw nsw i8 [[TMP9]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = and i8 [[TMP10]], 15
-; CHECK-NEXT:    [[TMP12:%.*]] = zext i8 [[TMP11]] to i32 
+; CHECK-NEXT:    [[TMP12:%.*]] = zext i8 [[TMP11]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP12]]
 ;
   %2 = lshr i8 %0, 1
@@ -32,8 +32,8 @@ define signext i32 @popcount8(i8 zeroext %0) {
   %9 = lshr i8 %8, 4
   %10 = add nuw nsw i8 %9, %8
   %11 = and i8 %10, 15
-  %12 = zext i8 %11 to i32 
-  ret i32 %12 
+  %12 = zext i8 %11 to i32
+  ret i32 %12
 }
 
 ;int popcount32(unsigned i) {
@@ -98,7 +98,7 @@ define signext i32 @popcount64(i64 %0) {
 ;  y <<= 64;
 ;  y |= 0x3333333333333333;
 ;  __uint128_t z = 0x0f0f0f0f0f0f0f0f;
-;  z <<= 64; 
+;  z <<= 64;
 ;  z |= 0x0f0f0f0f0f0f0f0f;
 ;  __uint128_t a = 0x0101010101010101;
 ;  a <<= 64;
@@ -112,7 +112,7 @@ define signext i32 @popcount64(i64 %0) {
 define signext i32 @popcount128(i128 %0) {
 ; CHECK-LABEL: @popcount128(
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i128 @llvm.ctpop.i128(i128 [[TMP0:%.*]])
-; CHECK-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32 
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i128 [[TMP2]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %2 = lshr i128 %0, 1
@@ -126,9 +126,9 @@ define signext i32 @popcount128(i128 %0) {
   %10 = add nuw nsw i128 %9, %8
   %11 = and i128 %10, 20016609818878733144904388672456953615
   %12 = mul i128 %11, 1334440654591915542993625911497130241
-  %13 = lshr i128 %12, 120 
-  %14 = trunc i128 %13 to i32 
-  ret i32 %14 
+  %13 = lshr i128 %12, 120
+  %14 = trunc i128 %13 to i32
+  ret i32 %14
 }
 
 ;vector unsigned char popcount8vec(vector unsigned char i)
@@ -140,16 +140,16 @@ define signext i32 @popcount128(i128 %0) {
 ;}
 define <16 x i8> @popcount8vec(<16 x i8> %0) {
 ; CHECK-LABEL: @popcount8vec(
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <16 x i8> [[TMP0:%.*]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i8> [[TMP2]], <i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85, i8 85>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <16 x i8> [[TMP0:%.*]], splat (i8 1)
+; CHECK-NEXT:    [[TMP3:%.*]] = and <16 x i8> [[TMP2]], splat (i8 85)
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub <16 x i8> [[TMP0]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = and <16 x i8> [[TMP4]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
-; CHECK-NEXT:    [[TMP6:%.*]] = lshr <16 x i8> [[TMP4]], <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEXT:    [[TMP7:%.*]] = and <16 x i8> [[TMP6]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
+; CHECK-NEXT:    [[TMP5:%.*]] = and <16 x i8> [[TMP4]], splat (i8 51)
+; CHECK-NEXT:    [[TMP6:%.*]] = lshr <16 x i8> [[TMP4]], splat (i8 2)
+; CHECK-NEXT:    [[TMP7:%.*]] = and <16 x i8> [[TMP6]], splat (i8 51)
 ; CHECK-NEXT:    [[TMP8:%.*]] = add nuw nsw <16 x i8> [[TMP7]], [[TMP5]]
-; CHECK-NEXT:    [[TMP9:%.*]] = lshr <16 x i8> [[TMP8]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr <16 x i8> [[TMP8]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nuw nsw <16 x i8> [[TMP9]], [[TMP8]]
-; CHECK-NEXT:    [[TMP11:%.*]] = and <16 x i8> [[TMP10]], <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>
+; CHECK-NEXT:    [[TMP11:%.*]] = and <16 x i8> [[TMP10]], splat (i8 15)
 ; CHECK-NEXT:    ret <16 x i8> [[TMP11]]
 ;
   %2 = lshr <16 x i8> %0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
diff --git a/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll b/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll
index b54963eda9d1f4..0a5c710a8b70ab 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/trunc_multi_uses.ll
@@ -116,7 +116,7 @@ define void @multi_uses_sub(i32 %X, i32 %Y) {
 define void @multi_use_vec_add(<2 x i32> %X) {
 ; CHECK-LABEL: @multi_use_vec_add(
 ; CHECK-NEXT:    [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[B1:%.*]] = add <2 x i32> [[X]], <i32 15, i32 15>
+; CHECK-NEXT:    [[B1:%.*]] = add <2 x i32> [[X]], splat (i32 15)
 ; CHECK-NEXT:    [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
@@ -135,7 +135,7 @@ define void @multi_use_vec_add(<2 x i32> %X) {
 define void @multi_use_vec_or(<2 x i32> %X) {
 ; CHECK-LABEL: @multi_use_vec_or(
 ; CHECK-NEXT:    [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[B1:%.*]] = or <2 x i32> [[X]], <i32 15, i32 15>
+; CHECK-NEXT:    [[B1:%.*]] = or <2 x i32> [[X]], splat (i32 15)
 ; CHECK-NEXT:    [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
@@ -154,7 +154,7 @@ define void @multi_use_vec_or(<2 x i32> %X) {
 define void @multi_use_vec_xor(<2 x i32> %X) {
 ; CHECK-LABEL: @multi_use_vec_xor(
 ; CHECK-NEXT:    [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[B1:%.*]] = xor <2 x i32> [[X]], <i32 15, i32 15>
+; CHECK-NEXT:    [[B1:%.*]] = xor <2 x i32> [[X]], splat (i32 15)
 ; CHECK-NEXT:    [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
@@ -173,7 +173,7 @@ define void @multi_use_vec_xor(<2 x i32> %X) {
 define void @multi_use_vec_and(<2 x i32> %X) {
 ; CHECK-LABEL: @multi_use_vec_and(
 ; CHECK-NEXT:    [[A1:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[B1:%.*]] = and <2 x i32> [[X]], <i32 15, i32 15>
+; CHECK-NEXT:    [[B1:%.*]] = and <2 x i32> [[X]], splat (i32 15)
 ; CHECK-NEXT:    [[C1:%.*]] = mul <2 x i32> [[B1]], [[B1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @use32_vec(<2 x i32> [[C1]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @use64_vec(<2 x i64> [[A1]])
diff --git a/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll
index 170fde4ef7002e..65595962001a43 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/vector-or-load.ll
@@ -11,7 +11,7 @@ define <8 x i16> @or-load-fixed-length-vector(ptr %p1) {
 ; CHECK-NEXT:    [[L2:%.*]] = load <8 x i8>, ptr [[P2]], align 1
 ; CHECK-NEXT:    [[E1:%.*]] = zext <8 x i8> [[L1]] to <8 x i16>
 ; CHECK-NEXT:    [[E2:%.*]] = zext <8 x i8> [[L2]] to <8 x i16>
-; CHECK-NEXT:    [[S2:%.*]] = shl <8 x i16> [[E2]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[S2:%.*]] = shl <8 x i16> [[E2]], splat (i16 8)
 ; CHECK-NEXT:    [[OR:%.*]] = or <8 x i16> [[E1]], [[S2]]
 ; CHECK-NEXT:    ret <8 x i16> [[OR]]
 ;
diff --git a/llvm/test/Transforms/Attributor/nofpclass-powi.ll b/llvm/test/Transforms/Attributor/nofpclass-powi.ll
index 176f770db0a584..1d59907a34f19a 100644
--- a/llvm/test/Transforms/Attributor/nofpclass-powi.ll
+++ b/llvm/test/Transforms/Attributor/nofpclass-powi.ll
@@ -104,7 +104,7 @@ define float @ret_powi_f32_masked_to_even_extrabits(float %arg0, i32 %arg1) #0 {
 define <2 x float> @ret_powi_v2f32_masked_to_even(<2 x float> %arg0, <2 x i32> %arg1) #0 {
 ; CHECK-LABEL: define nofpclass(ninf nzero nsub nnorm) <2 x float> @ret_powi_v2f32_masked_to_even
 ; CHECK-SAME: (<2 x float> [[ARG0:%.*]], <2 x i32> [[ARG1:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[KNOWN_EVEN:%.*]] = and <2 x i32> [[ARG1]], <i32 -2, i32 -2>
+; CHECK-NEXT:    [[KNOWN_EVEN:%.*]] = and <2 x i32> [[ARG1]], splat (i32 -2)
 ; CHECK-NEXT:    [[CALL:%.*]] = call nofpclass(ninf nzero nsub nnorm) <2 x float> @llvm.powi.v2f32.v2i32(<2 x float> [[ARG0]], <2 x i32> [[KNOWN_EVEN]]) #[[ATTR6]]
 ; CHECK-NEXT:    ret <2 x float> [[CALL]]
 ;
@@ -277,7 +277,7 @@ define <4 x float> @powi_v4f32_i32_regression(<4 x float> %arg) {
 ; CHECK-LABEL: define nofpclass(nzero) <4 x float> @powi_v4f32_i32_regression
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) #[[ATTR5]] {
 ; CHECK-NEXT:    [[POWI:%.*]] = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> [[ARG]], i32 noundef 4) #[[ATTR6]]
-; CHECK-NEXT:    [[USER:%.*]] = fsub <4 x float> [[POWI]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[USER:%.*]] = fsub <4 x float> [[POWI]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    ret <4 x float> [[USER]]
 ;
   %powi = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> %arg, i32 4)
diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll
index 2a6780b60211cf..5afe5e90d802bc 100644
--- a/llvm/test/Transforms/Attributor/nofpclass.ll
+++ b/llvm/test/Transforms/Attributor/nofpclass.ll
@@ -96,7 +96,7 @@ define <2 x double> @returned_zero_vector() {
 define <2 x double> @returned_negzero_vector() {
 ; CHECK-LABEL: define noundef nofpclass(nan inf pzero sub norm) <2 x double> @returned_negzero_vector() {
 ; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    ret <2 x double> <double -0.000000e+00, double -0.000000e+00>
+; CHECK-NEXT:    ret <2 x double> splat (double -0.000000e+00)
 ;
   call void @unknown()
   ret <2 x double> <double -0.0, double -0.0>
@@ -2909,7 +2909,7 @@ define <2 x float> @bitcast_to_float_vect_nnan(<2 x i32> %arg) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: define nofpclass(nan inf nzero nsub nnorm) <2 x float> @bitcast_to_float_vect_nnan
 ; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> [[ARG]], <i32 4, i32 4>
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> [[ARG]], splat (i32 4)
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i32> [[SHR]] to <2 x float>
 ; CHECK-NEXT:    ret <2 x float> [[CAST]]
 ;
@@ -2922,7 +2922,7 @@ define <2 x float> @bitcast_to_float_vect_sign_1(<2 x i32> %arg) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: define nofpclass(pinf pzero psub pnorm) <2 x float> @bitcast_to_float_vect_sign_1
 ; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[ARG]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[ARG]], splat (i32 -2147483648)
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i32> [[OR]] to <2 x float>
 ; CHECK-NEXT:    ret <2 x float> [[CAST]]
 ;
@@ -2935,7 +2935,7 @@ define <2 x float> @bitcast_to_float_vect_nan(<2 x i32> %arg) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: define nofpclass(inf zero sub norm) <2 x float> @bitcast_to_float_vect_nan
 ; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[ARG]], <i32 2139095041, i32 2139095041>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[ARG]], splat (i32 2139095041)
 ; CHECK-NEXT:    [[CAST:%.*]] = bitcast <2 x i32> [[OR]] to <2 x float>
 ; CHECK-NEXT:    ret <2 x float> [[CAST]]
 ;
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
index 70793ec5c7f833..24bf4938ff2d4a 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
@@ -60,9 +60,9 @@ define i32 @vec_write_3() {
 ; CHECK-LABEL: define {{[^@]+}}@vec_write_3
 ; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:    [[A:%.*]] = alloca <4 x i32>, align 16
-; CHECK-NEXT:    store <2 x i32> <i32 3, i32 3>, ptr [[A]], align 16
+; CHECK-NEXT:    store <2 x i32> splat (i32 3), ptr [[A]], align 16
 ; CHECK-NEXT:    [[G:%.*]] = getelementptr i32, ptr [[A]], i64 1
-; CHECK-NEXT:    store <2 x i32> <i32 5, i32 5>, ptr [[G]], align 8
+; CHECK-NEXT:    store <2 x i32> splat (i32 5), ptr [[G]], align 8
 ; CHECK-NEXT:    [[J:%.*]] = getelementptr i32, ptr [[G]], i64 1
 ; CHECK-NEXT:    [[L2B:%.*]] = load i32, ptr [[G]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = add i32 3, [[L2B]]
diff --git a/llvm/test/Transforms/BDCE/binops-multiuse.ll b/llvm/test/Transforms/BDCE/binops-multiuse.ll
index 0c03ca4d6fc57e..950a01bcf33ef5 100644
--- a/llvm/test/Transforms/BDCE/binops-multiuse.ll
+++ b/llvm/test/Transforms/BDCE/binops-multiuse.ll
@@ -243,8 +243,8 @@ define void @select_vectorized(i1 %c, <2 x i8> %a, <2 x i8> %b) {
 ; CHECK-SAME: i1 [[C:%.*]], <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], <2 x i8> [[A]], <2 x i8> [[B]]
-; CHECK-NEXT:    [[RET1:%.*]] = and <2 x i8> [[S]], <i8 4, i8 4>
-; CHECK-NEXT:    [[RET2:%.*]] = and <2 x i8> [[S]], <i8 12, i8 12>
+; CHECK-NEXT:    [[RET1:%.*]] = and <2 x i8> [[S]], splat (i8 4)
+; CHECK-NEXT:    [[RET2:%.*]] = and <2 x i8> [[S]], splat (i8 12)
 ; CHECK-NEXT:    call void @use3(<2 x i8> [[RET1]])
 ; CHECK-NEXT:    call void @use3(<2 x i8> [[RET2]])
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/BDCE/dead-uses.ll b/llvm/test/Transforms/BDCE/dead-uses.ll
index 85ee0dd8f2b904..5c1677e59a8b88 100644
--- a/llvm/test/Transforms/BDCE/dead-uses.ll
+++ b/llvm/test/Transforms/BDCE/dead-uses.ll
@@ -27,11 +27,11 @@ define i32 @pr39771_fshr_multi_use_instr(i32 %a) {
 ; First fshr operand is dead (vector variant).
 define <2 x i32> @pr39771_fshr_multi_use_instr_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @pr39771_fshr_multi_use_instr_vec(
-; CHECK-NEXT:    [[X:%.*]] = or <2 x i32> [[A:%.*]], <i32 2, i32 2>
-; CHECK-NEXT:    [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> zeroinitializer, <2 x i32> [[X]], <2 x i32> <i32 1, i32 1>)
-; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 23, i32 23>
+; CHECK-NEXT:    [[X:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 2)
+; CHECK-NEXT:    [[B:%.*]] = tail call <2 x i32> @llvm.fshr.v2i32(<2 x i32> zeroinitializer, <2 x i32> [[X]], <2 x i32> splat (i32 1))
+; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 23)
 ; CHECK-NEXT:    [[D:%.*]] = xor <2 x i32> [[C]], [[B]]
-; CHECK-NEXT:    [[E:%.*]] = and <2 x i32> [[D]], <i32 31, i32 31>
+; CHECK-NEXT:    [[E:%.*]] = and <2 x i32> [[D]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i32> [[E]]
 ;
   %x = or <2 x i32> %a, <i32 2, i32 2>
diff --git a/llvm/test/Transforms/BDCE/vectors-inseltpoison.ll b/llvm/test/Transforms/BDCE/vectors-inseltpoison.ll
index ccf979d4fa12a2..8527e481600937 100644
--- a/llvm/test/Transforms/BDCE/vectors-inseltpoison.ll
+++ b/llvm/test/Transforms/BDCE/vectors-inseltpoison.ll
@@ -5,11 +5,11 @@
 
 define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @test_basic(
-; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> zeroinitializer, <i32 4, i32 4>
-; CHECK-NEXT:    [[B2:%.*]] = add <2 x i32> [[B:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[B3:%.*]] = and <2 x i32> [[B2]], <i32 8, i32 8>
+; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4)
+; CHECK-NEXT:    [[B2:%.*]] = add <2 x i32> [[B:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[B3:%.*]] = and <2 x i32> [[B2]], splat (i32 8)
 ; CHECK-NEXT:    [[C:%.*]] = or <2 x i32> [[A3]], [[B3]]
-; CHECK-NEXT:    [[D:%.*]] = ashr <2 x i32> [[C]], <i32 3, i32 3>
+; CHECK-NEXT:    [[D:%.*]] = ashr <2 x i32> [[C]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
   %a2 = add <2 x i32> %a, <i32 1, i32 1>
@@ -24,9 +24,9 @@ define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) {
 ; Going vector -> scalar
 define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @test_extractelement(
-; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> zeroinitializer, <i32 4, i32 4>
-; CHECK-NEXT:    [[B2:%.*]] = add <2 x i32> [[B:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[B3:%.*]] = and <2 x i32> [[B2]], <i32 8, i32 8>
+; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4)
+; CHECK-NEXT:    [[B2:%.*]] = add <2 x i32> [[B:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[B3:%.*]] = and <2 x i32> [[B2]], splat (i32 8)
 ; CHECK-NEXT:    [[C:%.*]] = or <2 x i32> [[A3]], [[B3]]
 ; CHECK-NEXT:    [[D:%.*]] = extractelement <2 x i32> [[C]], i32 0
 ; CHECK-NEXT:    [[E:%.*]] = ashr i32 [[D]], 3
@@ -45,12 +45,12 @@ define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) {
 ; Going scalar -> vector
 define <2 x i32> @test_insertelement(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test_insertelement(
-; CHECK-NEXT:    [[X3:%.*]] = and <2 x i32> zeroinitializer, <i32 4, i32 4>
+; CHECK-NEXT:    [[X3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4)
 ; CHECK-NEXT:    [[Y:%.*]] = insertelement <2 x i32> poison, i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    [[Y2:%.*]] = insertelement <2 x i32> [[Y]], i32 [[A:%.*]], i32 1
-; CHECK-NEXT:    [[Y3:%.*]] = and <2 x i32> [[Y2]], <i32 8, i32 8>
+; CHECK-NEXT:    [[Y3:%.*]] = and <2 x i32> [[Y2]], splat (i32 8)
 ; CHECK-NEXT:    [[Z:%.*]] = or <2 x i32> [[X3]], [[Y3]]
-; CHECK-NEXT:    [[U:%.*]] = ashr <2 x i32> [[Z]], <i32 3, i32 3>
+; CHECK-NEXT:    [[U:%.*]] = ashr <2 x i32> [[Z]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i32> [[U]]
 ;
   %x = insertelement <2 x i32> poison, i32 %a, i32 0
@@ -67,12 +67,12 @@ define <2 x i32> @test_insertelement(i32 %a, i32 %b) {
 ; Some non-int vectors and conversions
 define <2 x i32> @test_conversion(<2 x i32> %a) {
 ; CHECK-LABEL: @test_conversion(
-; CHECK-NEXT:    [[A2:%.*]] = add <2 x i32> [[A:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> [[A2]], <i32 2, i32 2>
+; CHECK-NEXT:    [[A2:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> [[A2]], splat (i32 2)
 ; CHECK-NEXT:    [[X:%.*]] = uitofp <2 x i32> [[A3]] to <2 x double>
-; CHECK-NEXT:    [[Y:%.*]] = fadd <2 x double> [[X]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[Y:%.*]] = fadd <2 x double> [[X]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[Z:%.*]] = fptoui <2 x double> [[Y]] to <2 x i32>
-; CHECK-NEXT:    [[U:%.*]] = ashr <2 x i32> [[Z]], <i32 3, i32 3>
+; CHECK-NEXT:    [[U:%.*]] = ashr <2 x i32> [[Z]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i32> [[U]]
 ;
   %a2 = add <2 x i32> %a, <i32 1, i32 1>
@@ -88,7 +88,7 @@ define <2 x i32> @test_conversion(<2 x i32> %a) {
 define <2 x i1> @test_assumption_invalidation(<2 x i1> %b, <2 x i8> %x) {
 ; CHECK-LABEL: @test_assumption_invalidation(
 ; CHECK-NEXT:    [[LITTLE_NUMBER:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, <i8 1, i8 1>
+; CHECK-NEXT:    [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, splat (i8 1)
 ; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i8> [[BIG_NUMBER]], [[LITTLE_NUMBER]]
 ; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i8> [[SUB]] to <2 x i1>
 ; CHECK-NEXT:    ret <2 x i1> [[TRUNC]]
diff --git a/llvm/test/Transforms/BDCE/vectors.ll b/llvm/test/Transforms/BDCE/vectors.ll
index ba5ac9da63b0b4..3c50f9ca5976ed 100644
--- a/llvm/test/Transforms/BDCE/vectors.ll
+++ b/llvm/test/Transforms/BDCE/vectors.ll
@@ -5,11 +5,11 @@
 
 define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @test_basic(
-; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> zeroinitializer, <i32 4, i32 4>
-; CHECK-NEXT:    [[B2:%.*]] = add <2 x i32> [[B:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[B3:%.*]] = and <2 x i32> [[B2]], <i32 8, i32 8>
+; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4)
+; CHECK-NEXT:    [[B2:%.*]] = add <2 x i32> [[B:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[B3:%.*]] = and <2 x i32> [[B2]], splat (i32 8)
 ; CHECK-NEXT:    [[C:%.*]] = or <2 x i32> [[A3]], [[B3]]
-; CHECK-NEXT:    [[D:%.*]] = ashr <2 x i32> [[C]], <i32 3, i32 3>
+; CHECK-NEXT:    [[D:%.*]] = ashr <2 x i32> [[C]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
   %a2 = add <2 x i32> %a, <i32 1, i32 1>
@@ -24,9 +24,9 @@ define <2 x i32> @test_basic(<2 x i32> %a, <2 x i32> %b) {
 ; Going vector -> scalar
 define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @test_extractelement(
-; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> zeroinitializer, <i32 4, i32 4>
-; CHECK-NEXT:    [[B2:%.*]] = add <2 x i32> [[B:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[B3:%.*]] = and <2 x i32> [[B2]], <i32 8, i32 8>
+; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4)
+; CHECK-NEXT:    [[B2:%.*]] = add <2 x i32> [[B:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[B3:%.*]] = and <2 x i32> [[B2]], splat (i32 8)
 ; CHECK-NEXT:    [[C:%.*]] = or <2 x i32> [[A3]], [[B3]]
 ; CHECK-NEXT:    [[D:%.*]] = extractelement <2 x i32> [[C]], i32 0
 ; CHECK-NEXT:    [[E:%.*]] = ashr i32 [[D]], 3
@@ -45,12 +45,12 @@ define i32 @test_extractelement(<2 x i32> %a, <2 x i32> %b) {
 ; Going scalar -> vector
 define <2 x i32> @test_insertelement(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test_insertelement(
-; CHECK-NEXT:    [[X3:%.*]] = and <2 x i32> zeroinitializer, <i32 4, i32 4>
+; CHECK-NEXT:    [[X3:%.*]] = and <2 x i32> zeroinitializer, splat (i32 4)
 ; CHECK-NEXT:    [[Y:%.*]] = insertelement <2 x i32> undef, i32 [[B:%.*]], i32 0
 ; CHECK-NEXT:    [[Y2:%.*]] = insertelement <2 x i32> [[Y]], i32 [[A:%.*]], i32 1
-; CHECK-NEXT:    [[Y3:%.*]] = and <2 x i32> [[Y2]], <i32 8, i32 8>
+; CHECK-NEXT:    [[Y3:%.*]] = and <2 x i32> [[Y2]], splat (i32 8)
 ; CHECK-NEXT:    [[Z:%.*]] = or <2 x i32> [[X3]], [[Y3]]
-; CHECK-NEXT:    [[U:%.*]] = ashr <2 x i32> [[Z]], <i32 3, i32 3>
+; CHECK-NEXT:    [[U:%.*]] = ashr <2 x i32> [[Z]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i32> [[U]]
 ;
   %x = insertelement <2 x i32> undef, i32 %a, i32 0
@@ -67,12 +67,12 @@ define <2 x i32> @test_insertelement(i32 %a, i32 %b) {
 ; Some non-int vectors and conversions
 define <2 x i32> @test_conversion(<2 x i32> %a) {
 ; CHECK-LABEL: @test_conversion(
-; CHECK-NEXT:    [[A2:%.*]] = add <2 x i32> [[A:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> [[A2]], <i32 2, i32 2>
+; CHECK-NEXT:    [[A2:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[A3:%.*]] = and <2 x i32> [[A2]], splat (i32 2)
 ; CHECK-NEXT:    [[X:%.*]] = uitofp <2 x i32> [[A3]] to <2 x double>
-; CHECK-NEXT:    [[Y:%.*]] = fadd <2 x double> [[X]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[Y:%.*]] = fadd <2 x double> [[X]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[Z:%.*]] = fptoui <2 x double> [[Y]] to <2 x i32>
-; CHECK-NEXT:    [[U:%.*]] = ashr <2 x i32> [[Z]], <i32 3, i32 3>
+; CHECK-NEXT:    [[U:%.*]] = ashr <2 x i32> [[Z]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i32> [[U]]
 ;
   %a2 = add <2 x i32> %a, <i32 1, i32 1>
@@ -88,7 +88,7 @@ define <2 x i32> @test_conversion(<2 x i32> %a) {
 define <2 x i1> @test_assumption_invalidation(<2 x i1> %b, <2 x i8> %x) {
 ; CHECK-LABEL: @test_assumption_invalidation(
 ; CHECK-NEXT:    [[LITTLE_NUMBER:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, <i8 1, i8 1>
+; CHECK-NEXT:    [[BIG_NUMBER:%.*]] = shl <2 x i8> zeroinitializer, splat (i8 1)
 ; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i8> [[BIG_NUMBER]], [[LITTLE_NUMBER]]
 ; CHECK-NEXT:    [[TRUNC:%.*]] = trunc <2 x i8> [[SUB]] to <2 x i1>
 ; CHECK-NEXT:    ret <2 x i1> [[TRUNC]]
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll
index abfbf2e5e582eb..aab1926d4ef9fe 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll
@@ -319,20 +319,20 @@ for.body.tail:
 define void @simple_urem_to_sel_vec(<2 x i64> %rem_amt) nounwind {
 ; CHECK-LABEL: define void @simple_urem_to_sel_vec(
 ; CHECK-SAME: <2 x i64> [[REM_AMT:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  [[FOR_COND_CLEANUP:.*]]:
+; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
-; CHECK:       [[ENTRY:.*]]:
+; CHECK:       [[FOR_COND_CLEANUP:.*]]:
 ; CHECK-NEXT:    ret void
 ; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[REM:%.*]] = phi <2 x i64> [ zeroinitializer, %[[FOR_COND_CLEANUP]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ]
-; CHECK-NEXT:    [[I_04:%.*]] = phi <2 x i64> [ [[INC:%.*]], %[[FOR_BODY]] ], [ zeroinitializer, %[[FOR_COND_CLEANUP]] ]
+; CHECK-NEXT:    [[REM:%.*]] = phi <2 x i64> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[I_04:%.*]] = phi <2 x i64> [ [[INC:%.*]], %[[FOR_BODY]] ], [ zeroinitializer, %[[ENTRY]] ]
 ; CHECK-NEXT:    tail call void @use.2xi64(<2 x i64> [[REM]])
-; CHECK-NEXT:    [[TMP1:%.*]] = add nuw <2 x i64> [[REM]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw <2 x i64> [[REM]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], [[REM_AMT]]
 ; CHECK-NEXT:    [[TMP3]] = select <2 x i1> [[TMP2]], <2 x i64> zeroinitializer, <2 x i64> [[TMP1]]
-; CHECK-NEXT:    [[INC]] = add nuw <2 x i64> [[I_04]], <i64 1, i64 1>
+; CHECK-NEXT:    [[INC]] = add nuw <2 x i64> [[I_04]], splat (i64 1)
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = call i1 @get.i1()
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[ENTRY]], label %[[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll b/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll
index 6d8890d71e2be9..25cd5526f89a36 100644
--- a/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll
+++ b/llvm/test/Transforms/ConstantHoisting/AArch64/large-immediate.ll
@@ -123,17 +123,11 @@ define i64 @sdiv_minsize(i64 %a) minsize {
 }
 
 define <2 x i64> @sdiv_v2i64(<2 x i64> %a) {
-; CV-LABEL: define <2 x i64> @sdiv_v2i64(
-; CV-SAME: <2 x i64> [[A:%.*]]) {
-; CV-NEXT:    [[TMP1:%.*]] = sdiv <2 x i64> [[A]], <i64 4294967087, i64 4294967087>
-; CV-NEXT:    [[TMP2:%.*]] = add <2 x i64> [[TMP1]], <i64 4294967087, i64 4294967087>
-; CV-NEXT:    ret <2 x i64> [[TMP2]]
-;
-; CI-LABEL: define <2 x i64> @sdiv_v2i64(
-; CI-SAME: <2 x i64> [[A:%.*]]) {
-; CI-NEXT:    [[TMP1:%.*]] = sdiv <2 x i64> [[A]], splat (i64 4294967087)
-; CI-NEXT:    [[TMP2:%.*]] = add <2 x i64> [[TMP1]], splat (i64 4294967087)
-; CI-NEXT:    ret <2 x i64> [[TMP2]]
+; CHECK-LABEL: define <2 x i64> @sdiv_v2i64(
+; CHECK-SAME: <2 x i64> [[A:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <2 x i64> [[A]], splat (i64 4294967087)
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i64> [[TMP1]], splat (i64 4294967087)
+; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
 ;
   %1 = sdiv <2 x i64> %a, <i64 4294967087, i64 4294967087>
   %2 = add <2 x i64> %1, <i64 4294967087, i64 4294967087>
diff --git a/llvm/test/Transforms/ConstraintElimination/vector-compares.ll b/llvm/test/Transforms/ConstraintElimination/vector-compares.ll
index 3fa0e18ec5cf2c..e87439fbaf3b08 100644
--- a/llvm/test/Transforms/ConstraintElimination/vector-compares.ll
+++ b/llvm/test/Transforms/ConstraintElimination/vector-compares.ll
@@ -19,7 +19,7 @@ define void @test_vector_iv(i32 %x, i1 %c) {
 ; CHECK-NEXT:    call void @use(i1 [[C_2]])
 ; CHECK-NEXT:    [[C_3:%.*]] = icmp ugt i32 [[X]], 9
 ; CHECK-NEXT:    call void @use(i1 [[C_3]])
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw <4 x i8> [[IV]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw <4 x i8> [[IV]], splat (i8 1)
 ; CHECK-NEXT:    [[E:%.*]] = extractelement <4 x i8> [[IV_NEXT]], i8 2
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i8 [[E]], 100
 ; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
index 200793918f0ef2..376bed905f4167 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/icmp.ll
@@ -1248,8 +1248,8 @@ define i1 @non_const_range_minmax(i8 %a, i8 %b) {
 
 define <2 x i1> @non_const_range_minmax_vec(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @non_const_range_minmax_vec(
-; CHECK-NEXT:    [[A2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 10, i8 10>)
-; CHECK-NEXT:    [[B2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[B:%.*]], <2 x i8> <i8 11, i8 11>)
+; CHECK-NEXT:    [[A2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 10))
+; CHECK-NEXT:    [[B2:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[B:%.*]], <2 x i8> splat (i8 11))
 ; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
 ;
   %a2 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 10>)
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
index 669527ed88fa2f..5122f98f4c82fd 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/overflows.ll
@@ -715,7 +715,7 @@ define { i8, i1 } @signed_mul_constant_folding() {
 
 define { <2 x i32>, <2 x i1> } @uaddo_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @uaddo_vec(
-; CHECK-NEXT:    [[ADD:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A:%.*]], <2 x i32> <i32 1, i32 1>)
+; CHECK-NEXT:    [[ADD:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 1))
 ; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[ADD]]
 ;
   %add = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> <i32 1, i32 1>)
@@ -909,7 +909,7 @@ cont:
 
 define <2 x i8> @uadd_sat_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @uadd_sat_vec(
-; CHECK-NEXT:    [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 1, i8 1>)
+; CHECK-NEXT:    [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 1))
 ; CHECK-NEXT:    ret <2 x i8> [[ADD]]
 ;
   %add = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 1, i8 1>)
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll
index bda270e99bc2e6..77197258422ac3 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/vectors.ll
@@ -4,7 +4,7 @@
 define <2 x i1> @cmp1(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i1> @cmp1(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw <2 x i8> [[A]], <i8 1, i8 1>
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw <2 x i8> [[A]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
 ;
   %add = add nuw <2 x i8> %a, splat (i8 1)
@@ -15,7 +15,7 @@ define <2 x i1> @cmp1(<2 x i8> %a) {
 define <2 x i1> @cmp2(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i1> @cmp2(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw <2 x i8> [[A]], <i8 5, i8 5>
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw <2 x i8> [[A]], splat (i8 5)
 ; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
 ;
   %add = add nuw <2 x i8> %a, splat (i8 5)
@@ -51,7 +51,7 @@ define <2 x i1> @cmp_signedness(<2 x i8> %a) {
 ; CHECK-LABEL: define <2 x i1> @cmp_signedness(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i16> [[ZEXT]], <i16 5, i16 5>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i16> [[ZEXT]], splat (i16 5)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -63,7 +63,7 @@ define <2 x i16> @infer_nowrap(<2 x i8> %a) {
 ; CHECK-LABEL: define range(i16 1, 257) <2 x i16> @infer_nowrap(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], <i16 1, i16 1>
+; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], splat (i16 1)
 ; CHECK-NEXT:    ret <2 x i16> [[RES]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -134,7 +134,7 @@ define <2 x i16> @saturating(<2 x i8> %a) {
 ; CHECK-LABEL: define range(i16 1, 257) <2 x i16> @saturating(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], <i16 1, i16 1>
+; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], splat (i16 1)
 ; CHECK-NEXT:    ret <2 x i16> [[RES]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -146,7 +146,7 @@ define {<2 x i16>, <2 x i1>} @with_overflow(<2 x i8> %a) {
 ; CHECK-LABEL: define { <2 x i16>, <2 x i1> } @with_overflow(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES1:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], <i16 1, i16 1>
+; CHECK-NEXT:    [[RES1:%.*]] = add nuw nsw <2 x i16> [[ZEXT]], splat (i16 1)
 ; CHECK-NEXT:    [[RES:%.*]] = insertvalue { <2 x i16>, <2 x i1> } { <2 x i16> poison, <2 x i1> zeroinitializer }, <2 x i16> [[RES1]], 0
 ; CHECK-NEXT:    ret { <2 x i16>, <2 x i1> } [[RES]]
 ;
@@ -160,7 +160,7 @@ define <2 x i16> @srem1(<2 x i8> %a) {
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
 ; CHECK-NEXT:    [[RES1_LHS_TRUNC:%.*]] = trunc <2 x i16> [[ZEXT]] to <2 x i8>
-; CHECK-NEXT:    [[RES12:%.*]] = urem <2 x i8> [[RES1_LHS_TRUNC]], <i8 42, i8 42>
+; CHECK-NEXT:    [[RES12:%.*]] = urem <2 x i8> [[RES1_LHS_TRUNC]], splat (i8 42)
 ; CHECK-NEXT:    [[RES:%.*]] = zext <2 x i8> [[RES12]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[RES]]
 ;
@@ -174,7 +174,7 @@ define <2 x i16> @srem2(<2 x i8> %a) {
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = sext <2 x i8> [[A]] to <2 x i16>
 ; CHECK-NEXT:    [[RES_LHS_TRUNC:%.*]] = trunc <2 x i16> [[ZEXT]] to <2 x i8>
-; CHECK-NEXT:    [[RES1:%.*]] = srem <2 x i8> [[RES_LHS_TRUNC]], <i8 42, i8 42>
+; CHECK-NEXT:    [[RES1:%.*]] = srem <2 x i8> [[RES_LHS_TRUNC]], splat (i8 42)
 ; CHECK-NEXT:    [[RES:%.*]] = sext <2 x i8> [[RES1]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[RES]]
 ;
@@ -187,7 +187,7 @@ define <2 x i16> @ashr(<2 x i8> %a) {
 ; CHECK-LABEL: define range(i16 0, 128) <2 x i16> @ashr(
 ; CHECK-SAME: <2 x i8> [[A:%.*]]) {
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i8> [[A]] to <2 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = lshr <2 x i16> [[ZEXT]], <i16 1, i16 1>
+; CHECK-NEXT:    [[RES:%.*]] = lshr <2 x i16> [[ZEXT]], splat (i16 1)
 ; CHECK-NEXT:    ret <2 x i16> [[RES]]
 ;
   %zext = zext <2 x i8> %a to <2 x i16>
@@ -247,10 +247,10 @@ define <4 x i64> @issue_97674_getConstantOnEdge(i1 %cond) {
 ; CHECK-NEXT:  [[ENTRY:.*]]:
 ; CHECK-NEXT:    br i1 [[COND]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
 ; CHECK:       [[IF_THEN]]:
-; CHECK-NEXT:    [[FOLDS:%.*]] = add nuw nsw <4 x i64> zeroinitializer, <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[FOLDS:%.*]] = add nuw nsw <4 x i64> zeroinitializer, splat (i64 1)
 ; CHECK-NEXT:    br label %[[IF_END]]
 ; CHECK:       [[IF_END]]:
-; CHECK-NEXT:    [[R:%.*]] = phi <4 x i64> [ <i64 1, i64 1, i64 1, i64 1>, %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi <4 x i64> [ splat (i64 1), %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
 ; CHECK-NEXT:    ret <4 x i64> [[R]]
 ;
 entry:
diff --git a/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll b/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll
index 8b257f86caf6c0..9f6c21450d040e 100644
--- a/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/X86/gather-null-pointer.ll
@@ -6,9 +6,9 @@
 define <4 x i32> @bar(<4 x i32> %arg, ptr %arg1) {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store i32 5, ptr [[ARG1:%.*]]
-; CHECK-NEXT:    [[TMP:%.*]] = tail call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, ptr null, <4 x i32> [[ARG:%.*]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, i8 1)
-; CHECK-NEXT:    store i32 10, ptr [[ARG1]]
+; CHECK-NEXT:    store i32 5, ptr [[ARG1:%.*]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = tail call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> zeroinitializer, ptr null, <4 x i32> [[ARG:%.*]], <4 x i32> splat (i32 -1), i8 1)
+; CHECK-NEXT:    store i32 10, ptr [[ARG1]], align 4
 ; CHECK-NEXT:    ret <4 x i32> [[TMP]]
 ;
 bb:
diff --git a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
index fbab350008f4ee..bdc023128c8520 100644
--- a/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/offsetted-overlapping-stores.ll
@@ -79,7 +79,7 @@ define void @VectorTestPartiallyOverlapping(ptr %arg, i32 %i) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[I2:%.*]] = zext i32 [[I:%.*]] to i64
 ; CHECK-NEXT:    [[I3:%.*]] = getelementptr inbounds float, ptr [[ARG:%.*]], i64 [[I2]]
-; CHECK-NEXT:    store <2 x float> <float 1.000000e+00, float 1.000000e+00>, ptr [[I3]], align 16
+; CHECK-NEXT:    store <2 x float> splat (float 1.000000e+00), ptr [[I3]], align 16
 ; CHECK-NEXT:    [[I5:%.*]] = add nuw nsw i32 [[I]], 1
 ; CHECK-NEXT:    [[I6:%.*]] = zext i32 [[I5]] to i64
 ; CHECK-NEXT:    [[I7:%.*]] = getelementptr inbounds float, ptr [[ARG]], i64 [[I6]]
diff --git a/llvm/test/Transforms/DivRemPairs/AMDGPU/div-rem-pairs.ll b/llvm/test/Transforms/DivRemPairs/AMDGPU/div-rem-pairs.ll
index d01ded9ebbfda8..ef2df8a3d678a9 100644
--- a/llvm/test/Transforms/DivRemPairs/AMDGPU/div-rem-pairs.ll
+++ b/llvm/test/Transforms/DivRemPairs/AMDGPU/div-rem-pairs.ll
@@ -74,8 +74,8 @@ define i32 @poison_does_not_freeze_signed(ptr %p, i32 noundef %x, i32 noundef %y
 define <4 x i8> @poison_does_not_freeze_vector(ptr %p, <4 x i8> noundef %x, <4 x i8> noundef %y) {
 ; CHECK-LABEL: define <4 x i8> @poison_does_not_freeze_vector(
 ; CHECK-SAME: ptr [[P:%.*]], <4 x i8> noundef [[X:%.*]], <4 x i8> noundef [[Y:%.*]]) {
-; CHECK-NEXT:    [[X2:%.*]] = shl nuw nsw <4 x i8> [[X]], <i8 5, i8 5, i8 5, i8 5>
-; CHECK-NEXT:    [[Y2:%.*]] = add nuw nsw <4 x i8> [[Y]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[X2:%.*]] = shl nuw nsw <4 x i8> [[X]], splat (i8 5)
+; CHECK-NEXT:    [[Y2:%.*]] = add nuw nsw <4 x i8> [[Y]], splat (i8 1)
 ; CHECK-NEXT:    [[DIV:%.*]] = udiv <4 x i8> [[X2]], [[Y2]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i8> [[DIV]], [[Y2]]
 ; CHECK-NEXT:    [[REM_DECOMPOSED:%.*]] = sub <4 x i8> [[X2]], [[TMP1]]
diff --git a/llvm/test/Transforms/EarlyCSE/commute.ll b/llvm/test/Transforms/EarlyCSE/commute.ll
index 1cf7ddda7f0dd7..ecf02d037a52f5 100644
--- a/llvm/test/Transforms/EarlyCSE/commute.ll
+++ b/llvm/test/Transforms/EarlyCSE/commute.ll
@@ -485,7 +485,7 @@ define <2 x double> @select_not_cond_commute_vec(<2 x i1> %cond, <2 x double> %t
 ; CHECK-LABEL: @select_not_cond_commute_vec(
 ; CHECK-NEXT:    [[NOT:%.*]] = xor <2 x i1> [[COND:%.*]], <i1 true, i1 true>
 ; CHECK-NEXT:    [[M1:%.*]] = select <2 x i1> [[COND]], <2 x double> [[T:%.*]], <2 x double> [[F:%.*]]
-; CHECK-NEXT:    ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    ret <2 x double> splat (double 1.000000e+00)
 ;
   %not = xor <2 x i1> %cond, <i1 -1, i1 -1>
   %m1 = select <2 x i1> %cond, <2 x double> %t, <2 x double> %f
@@ -833,7 +833,7 @@ define float @maxnum(float %a, float %b) {
 define <2 x float> @minnum(<2 x float> %a, <2 x float> %b) {
 ; CHECK-LABEL: @minnum(
 ; CHECK-NEXT:    [[X:%.*]] = call fast <2 x float> @llvm.minnum.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]])
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
   %x = call fast <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b)
   %y = call fast <2 x float> @llvm.minnum.v2f32(<2 x float> %b, <2 x float> %a)
@@ -844,7 +844,7 @@ define <2 x float> @minnum(<2 x float> %a, <2 x float> %b) {
 define <2 x double> @maximum(<2 x double> %a, <2 x double> %b) {
 ; CHECK-LABEL: @maximum(
 ; CHECK-NEXT:    [[X:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]])
-; CHECK-NEXT:    ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    ret <2 x double> splat (double 1.000000e+00)
 ;
   %x = call fast <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b)
   %y = call <2 x double> @llvm.maximum.v2f64(<2 x double> %b, <2 x double> %a)
@@ -1109,7 +1109,7 @@ define float @fma_different_add_ops(float %a, float %b, float %c, float %d) {
 define <2 x double> @fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fmuladd(
 ; CHECK-NEXT:    [[X:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]])
-; CHECK-NEXT:    ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    ret <2 x double> splat (double 1.000000e+00)
 ;
   %x = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
   %y = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> %a, <2 x double> %c)
diff --git a/llvm/test/Transforms/EarlyCSE/gep.ll b/llvm/test/Transforms/EarlyCSE/gep.ll
index 499b5ac8de0af9..282cb94d8c5f20 100644
--- a/llvm/test/Transforms/EarlyCSE/gep.ll
+++ b/llvm/test/Transforms/EarlyCSE/gep.ll
@@ -13,7 +13,7 @@ define void @foo(ptr %a, <4 x i64> %b, i64 %i) {
 ; CHECK-NEXT:    [[N1D:%.*]] = getelementptr i8, ptr [[A]], i64 7
 ; CHECK-NEXT:    [[N1G:%.*]] = getelementptr i32, ptr [[A]], i64 1
 ; CHECK-NEXT:    [[N1H:%.*]] = getelementptr i8, ptr [[A]], i64 [[I]]
-; CHECK-NEXT:    [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[V:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> splat (i64 1)
 ; CHECK-NEXT:    call void @use_vec(<4 x ptr> [[V]])
 ; CHECK-NEXT:    [[V2:%.*]] = getelementptr i64, ptr [[A]], <4 x i64> <i64 0, i64 2, i64 1, i64 1>
 ; CHECK-NEXT:    call void @use_vec(<4 x ptr> [[V2]])
diff --git a/llvm/test/Transforms/GVN/non-integral-pointers-inseltpoison.ll b/llvm/test/Transforms/GVN/non-integral-pointers-inseltpoison.ll
index c0afce55cb3e87..18e9dc41a6b6dd 100644
--- a/llvm/test/Transforms/GVN/non-integral-pointers-inseltpoison.ll
+++ b/llvm/test/Transforms/GVN/non-integral-pointers-inseltpoison.ll
@@ -287,7 +287,7 @@ declare void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) nocapture, ptr nocapture, i
 define ptr addrspace(4) @neg_store_clobber(ptr addrspace(4) %loc) {
 ; CHECK-LABEL: @neg_store_clobber(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store <2 x i64> <i64 4, i64 4>, ptr addrspace(4) [[LOC:%.*]], align 16
+; CHECK-NEXT:    store <2 x i64> splat (i64 4), ptr addrspace(4) [[LOC:%.*]], align 16
 ; CHECK-NEXT:    [[LOC_OFF:%.*]] = getelementptr ptr addrspace(4), ptr addrspace(4) [[LOC]], i64 1
 ; CHECK-NEXT:    [[REF:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[LOC_OFF]], align 8
 ; CHECK-NEXT:    ret ptr addrspace(4) [[REF]]
@@ -369,7 +369,7 @@ entry:
 
 declare void @use.v2(<2 x ptr addrspace(4)>)
 declare void @use.v4(<4 x ptr addrspace(4)>)
- define ptr addrspace(5) @multini(i1 %alwaysFalse, ptr addrspace(4) %val, ptr %loc) {
+  define ptr addrspace(5) @multini(i1 %alwaysFalse, ptr addrspace(4) %val, ptr %loc) {
 ; CHECK-LABEL: @multini(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store ptr addrspace(4) [[VAL:%.*]], ptr [[LOC:%.*]], align 8
diff --git a/llvm/test/Transforms/GVN/non-integral-pointers.ll b/llvm/test/Transforms/GVN/non-integral-pointers.ll
index 5f2aef4f443502..559d63651eb323 100644
--- a/llvm/test/Transforms/GVN/non-integral-pointers.ll
+++ b/llvm/test/Transforms/GVN/non-integral-pointers.ll
@@ -287,7 +287,7 @@ declare void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) nocapture, ptr nocapture, i
 define ptr addrspace(4) @neg_store_clobber(ptr addrspace(4) %loc) {
 ; CHECK-LABEL: @neg_store_clobber(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store <2 x i64> <i64 4, i64 4>, ptr addrspace(4) [[LOC:%.*]], align 16
+; CHECK-NEXT:    store <2 x i64> splat (i64 4), ptr addrspace(4) [[LOC:%.*]], align 16
 ; CHECK-NEXT:    [[LOC_OFF:%.*]] = getelementptr ptr addrspace(4), ptr addrspace(4) [[LOC]], i64 1
 ; CHECK-NEXT:    [[REF:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[LOC_OFF]], align 8
 ; CHECK-NEXT:    ret ptr addrspace(4) [[REF]]
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
index 2f6640ce985426..8a9c81912cc935 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
@@ -48,7 +48,7 @@ define void @constexpr_gep_gep_addrspacecast(i64 %idx0, i64 %idx1) {
 define amdgpu_kernel void @vector_gep(<4 x ptr addrspace(3)> %array) nounwind {
 ; CHECK-LABEL: @vector_gep(
 ; CHECK-NEXT:    [[CAST:%.*]] = addrspacecast <4 x ptr addrspace(3)> [[ARRAY:%.*]] to <4 x ptr>
-; CHECK-NEXT:    [[P:%.*]] = getelementptr [1024 x i32], <4 x ptr> [[CAST]], <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+; CHECK-NEXT:    [[P:%.*]] = getelementptr [1024 x i32], <4 x ptr> [[CAST]], <4 x i16> zeroinitializer, <4 x i16> splat (i16 16)
 ; CHECK-NEXT:    [[P0:%.*]] = extractelement <4 x ptr> [[P]], i32 0
 ; CHECK-NEXT:    [[P1:%.*]] = extractelement <4 x ptr> [[P]], i32 1
 ; CHECK-NEXT:    [[P2:%.*]] = extractelement <4 x ptr> [[P]], i32 2
diff --git a/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll b/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
index 9e43aff4553044..397cb2af55c2c8 100644
--- a/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
+++ b/llvm/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
@@ -16,8 +16,8 @@ define i1 @test(i32 %tmp6) {
 
 define <2 x i1> @test_vec(<2 x i32> %tmp6) {
 ; CHECK-LABEL: @test_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[TMP6:%.*]], <i32 71, i32 71>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 -12, i32 -12>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[TMP6:%.*]], splat (i32 71)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -12)
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
   %tmp7 = sdiv <2 x i32> %tmp6, <i32 12, i32 12>
diff --git a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
index fa63847cab2a2c..c723e88ad65dd9 100644
--- a/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
+++ b/llvm/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
@@ -20,8 +20,8 @@ define i16 @test1(i16 %a) {
 
 define <2 x i16> @test1_vec(<2 x i16> %a) {
 ; CHECK-LABEL: @test1_vec(
-; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 8, i16 8>
-; CHECK-NEXT:    [[D:%.*]] = mul <2 x i16> [[A]], <i16 5, i16 5>
+; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i16> [[A:%.*]], splat (i16 8)
+; CHECK-NEXT:    [[D:%.*]] = mul <2 x i16> [[A]], splat (i16 5)
 ; CHECK-NEXT:    [[E:%.*]] = or <2 x i16> [[C]], [[D]]
 ; CHECK-NEXT:    ret <2 x i16> [[E]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll b/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll
index 0038abe5e7b117..f334e42c85994a 100644
--- a/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll
+++ b/llvm/test/Transforms/InstCombine/2008-07-11-RemAnd.ll
@@ -17,7 +17,7 @@ entry:
 define <2 x i32> @a_vec(<2 x i32> %b) nounwind  {
 ; CHECK-LABEL: @a_vec(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i32> [[B:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP0]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll b/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
index f1f881b8a1bb00..d3cbd943432360 100644
--- a/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
+++ b/llvm/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
@@ -4,7 +4,7 @@
 
 define <2 x i8> @foo(<2 x i8> %x) {
 ; CHECK-LABEL: @foo(
-; CHECK-NEXT:    [[A:%.*]] = srem <2 x i8> [[X:%.*]], <i8 2, i8 2>
+; CHECK-NEXT:    [[A:%.*]] = srem <2 x i8> [[X:%.*]], splat (i8 2)
 ; CHECK-NEXT:    ret <2 x i8> [[A]]
 ;
   %A = srem <2 x i8> %x, <i8 2, i8 -2>
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index e4f8bb133e6ce6..779def76fc58d3 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -1155,7 +1155,7 @@ define <2 x half> @constant_cvt_pkrtz() {
 ; Test constant values where rtz changes result
 define <2 x half> @constant_rtz_pkrtz() {
 ; CHECK-LABEL: @constant_rtz_pkrtz(
-; CHECK-NEXT:    ret <2 x half> <half 0xH7BFF, half 0xH7BFF>
+; CHECK-NEXT:    ret <2 x half> splat (half 0xH7BFF)
 ;
   %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0)
   ret <2 x half> %cvt
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll
index 49f4729e746e86..5e2da429948812 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll
@@ -158,7 +158,7 @@ define <4 x i64> @mload_v4i64(ptr %f) {
 
 define <4 x i64> @mload_v4i64_cmp(ptr %f, <4 x i64> %src) {
 ; CHECK-LABEL: @mload_v4i64_cmp(
-; CHECK-NEXT:    [[ICMP:%.*]] = icmp sgt <4 x i64> [[SRC:%.*]], <i64 -1, i64 -1, i64 -1, i64 -1>
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp sgt <4 x i64> [[SRC:%.*]], splat (i64 -1)
 ; CHECK-NEXT:    [[LD:%.*]] = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr [[F:%.*]], i32 1, <4 x i1> [[ICMP]], <4 x i64> zeroinitializer)
 ; CHECK-NEXT:    ret <4 x i64> [[LD]]
 ;
@@ -270,7 +270,7 @@ define void @mstore_v4f64(ptr %f, <4 x double> %v) {
 
 define void @mstore_v4f64_cmp(ptr %f, <4 x i32> %src, <4 x double> %v) {
 ; CHECK-LABEL: @mstore_v4f64_cmp(
-; CHECK-NEXT:    [[ICMP:%.*]] = icmp sgt <4 x i32> [[SRC:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp sgt <4 x i32> [[SRC:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> [[V:%.*]], ptr [[F:%.*]], i32 1, <4 x i1> [[ICMP]])
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll b/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll
index 04bba79aada0ab..76e2f9af2ed1de 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-movmsk.ll
@@ -261,7 +261,7 @@ define i32 @zero_x86_avx2_pmovmskb() {
 
 define i32 @fold_x86_mmx_pmovmskb() {
 ; CHECK-LABEL: @fold_x86_mmx_pmovmskb(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> <i64 18084223940296448>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(<1 x i64> splat (i64 18084223940296448))
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %1 = bitcast <8 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256> to <1 x i64>
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll
index 80d3c42341f820..394754b7b54428 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-muldq-inseltpoison.ll
@@ -182,8 +182,8 @@ define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i64> [[TMP3]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i64> [[TMP4]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i64> [[TMP3]], splat (i64 4294967295)
+; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i64> [[TMP4]], splat (i64 4294967295)
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <4 x i64> [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    ret <4 x i64> [[TMP7]]
 ;
@@ -199,8 +199,8 @@ define <8 x i64> @test_demanded_elts_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = and <8 x i64> [[TMP3]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-; CHECK-NEXT:    [[TMP6:%.*]] = and <8 x i64> [[TMP4]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[TMP5:%.*]] = and <8 x i64> [[TMP3]], splat (i64 4294967295)
+; CHECK-NEXT:    [[TMP6:%.*]] = and <8 x i64> [[TMP4]], splat (i64 4294967295)
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <8 x i64> [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    ret <8 x i64> [[TMP7]]
 ;
@@ -216,10 +216,10 @@ define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], <i64 32, i64 32>
-; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], <i64 32, i64 32>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], <i64 32, i64 32>
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], splat (i64 32)
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], splat (i64 32)
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], splat (i64 32)
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <2 x i64> [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    ret <2 x i64> [[TMP9]]
 ;
@@ -235,10 +235,10 @@ define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], <i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], <i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], <i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], <i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], splat (i64 32)
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], splat (i64 32)
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], splat (i64 32)
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <4 x i64> [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
 ; CHECK-NEXT:    ret <4 x i64> [[TMP10]]
@@ -256,10 +256,10 @@ define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], splat (i64 32)
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], splat (i64 32)
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], splat (i64 32)
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <8 x i64> [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
 ; CHECK-NEXT:    ret <8 x i64> [[TMP10]]
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll b/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll
index 5504fa15dc3026..0b0b2349bb061d 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll
@@ -182,8 +182,8 @@ define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i64> [[TMP3]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i64> [[TMP4]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i64> [[TMP3]], splat (i64 4294967295)
+; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i64> [[TMP4]], splat (i64 4294967295)
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <4 x i64> [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    ret <4 x i64> [[TMP7]]
 ;
@@ -199,8 +199,8 @@ define <8 x i64> @test_demanded_elts_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = and <8 x i64> [[TMP3]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-; CHECK-NEXT:    [[TMP6:%.*]] = and <8 x i64> [[TMP4]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[TMP5:%.*]] = and <8 x i64> [[TMP3]], splat (i64 4294967295)
+; CHECK-NEXT:    [[TMP6:%.*]] = and <8 x i64> [[TMP4]], splat (i64 4294967295)
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <8 x i64> [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    ret <8 x i64> [[TMP7]]
 ;
@@ -216,10 +216,10 @@ define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], <i64 32, i64 32>
-; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], <i64 32, i64 32>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], <i64 32, i64 32>
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], splat (i64 32)
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], splat (i64 32)
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], splat (i64 32)
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <2 x i64> [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    ret <2 x i64> [[TMP9]]
 ;
@@ -235,10 +235,10 @@ define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], <i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], <i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], <i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], <i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], splat (i64 32)
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], splat (i64 32)
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], splat (i64 32)
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <4 x i64> [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
 ; CHECK-NEXT:    ret <4 x i64> [[TMP10]]
@@ -256,10 +256,10 @@ define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64>
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
-; CHECK-NEXT:    [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], splat (i64 32)
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], splat (i64 32)
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], splat (i64 32)
+; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <8 x i64> [[TMP6]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
 ; CHECK-NEXT:    ret <8 x i64> [[TMP10]]
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll
index 74d58bae742746..43e5f0cbefc9be 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pack-inseltpoison.ll
@@ -359,8 +359,8 @@ define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) {
 
 define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: @trunc_packssdw_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], splat (i32 17)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -372,8 +372,8 @@ define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
 
 define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: @trunc_packusdw_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 17)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -385,8 +385,8 @@ define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
 
 define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: @trunc_packsswb_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
 ;
@@ -398,8 +398,8 @@ define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
 
 define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: @trunc_packuswb_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
 ;
@@ -411,8 +411,8 @@ define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
 
 define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: @trunc_packssdw_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], splat (i32 17)
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], splat (i32 23)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
 ;
@@ -424,8 +424,8 @@ define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
 
 define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: @trunc_packusdw_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], splat (i32 17)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], splat (i32 15)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
 ;
@@ -437,8 +437,8 @@ define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) {
 
 define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) {
 ; CHECK-LABEL: @trunc_packsswb_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
 ;
@@ -450,8 +450,8 @@ define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) {
 
 define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
 ; CHECK-LABEL: @trunc_packuswb_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
 ;
@@ -463,8 +463,8 @@ define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
 
 define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
 ; CHECK-LABEL: @trunc_packssdw_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], splat (i32 17)
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], splat (i32 23)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
 ;
@@ -476,8 +476,8 @@ define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
 
 define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) {
 ; CHECK-LABEL: @trunc_packusdw_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], splat (i32 17)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], splat (i32 15)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
 ;
@@ -489,8 +489,8 @@ define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) {
 
 define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) {
 ; CHECK-LABEL: @trunc_packsswb_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
 ;
@@ -502,8 +502,8 @@ define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) {
 
 define <64 x i8> @trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) {
 ; CHECK-LABEL: @trunc_packuswb_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll
index 6ddcb856692b0f..15439ebfa2151d 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll
@@ -359,8 +359,8 @@ define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) {
 
 define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: @trunc_packssdw_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[A0:%.*]], splat (i32 17)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -372,8 +372,8 @@ define <8 x i16> @trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
 
 define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: @trunc_packusdw_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 17)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A1:%.*]], splat (i32 15)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 ;
@@ -385,8 +385,8 @@ define <8 x i16> @trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
 
 define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: @trunc_packsswb_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
 ;
@@ -398,8 +398,8 @@ define <16 x i8> @trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
 
 define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: @trunc_packuswb_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[A0:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A1:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
 ;
@@ -411,8 +411,8 @@ define <16 x i8> @trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
 
 define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: @trunc_packssdw_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[A0:%.*]], splat (i32 17)
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <8 x i32> [[A1:%.*]], splat (i32 23)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
 ;
@@ -424,8 +424,8 @@ define <16 x i16> @trunc_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
 
 define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: @trunc_packusdw_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[A0:%.*]], splat (i32 17)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i32> [[A1:%.*]], splat (i32 15)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[TMP1]], <8 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
 ;
@@ -437,8 +437,8 @@ define <16 x i16> @trunc_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) {
 
 define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) {
 ; CHECK-LABEL: @trunc_packsswb_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
 ;
@@ -450,8 +450,8 @@ define <32 x i8> @trunc_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) {
 
 define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
 ; CHECK-LABEL: @trunc_packuswb_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[A0:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i16> [[A1:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> [[TMP1]], <16 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
 ;
@@ -463,8 +463,8 @@ define <32 x i8> @trunc_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
 
 define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
 ; CHECK-LABEL: @trunc_packssdw_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], <i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23, i32 23>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[A0:%.*]], splat (i32 17)
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <16 x i32> [[A1:%.*]], splat (i32 23)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
 ;
@@ -476,8 +476,8 @@ define <32 x i16> @trunc_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
 
 define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) {
 ; CHECK-LABEL: @trunc_packusdw_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[A0:%.*]], splat (i32 17)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <16 x i32> [[A1:%.*]], splat (i32 15)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP1]], <16 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
 ;
@@ -489,8 +489,8 @@ define <32 x i16> @trunc_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) {
 
 define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) {
 ; CHECK-LABEL: @trunc_packsswb_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
 ;
@@ -502,8 +502,8 @@ define <64 x i8> @trunc_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) {
 
 define <64 x i8> @trunc_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) {
 ; CHECK-LABEL: @trunc_packuswb_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[A0:%.*]], splat (i16 15)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <32 x i16> [[A1:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[TMP1]], <32 x i16> [[TMP2]])
 ; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll b/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll
index 947c7d38d26ee0..4d4d619ea7cb41 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pmulh.ll
@@ -111,7 +111,7 @@ define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) {
 
 define <8 x i16> @one_pmulh_128(<8 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -120,7 +120,7 @@ define <8 x i16> @one_pmulh_128(<8 x i16> %a0) {
 
 define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_128_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[A0:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0)
@@ -129,7 +129,7 @@ define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) {
 
 define <16 x i16> @one_pmulh_256(<16 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -138,7 +138,7 @@ define <16 x i16> @one_pmulh_256(<16 x i16> %a0) {
 
 define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_256_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[A0:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0)
@@ -147,7 +147,7 @@ define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) {
 
 define <32 x i16> @one_pmulh_512(<32 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -156,7 +156,7 @@ define <32 x i16> @one_pmulh_512(<32 x i16> %a0) {
 
 define <32 x i16> @one_pmulh_512_commute(<32 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_512_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[A0:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll b/llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll
index 40ad11a6993915..3b14185edde71c 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-pmulhrs.ll
@@ -111,7 +111,7 @@ define <32 x i16> @zero_pmulh_512_commute(<32 x i16> %a0) {
 
 define <8 x i16> @one_pmulh_128(<8 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> [[A0:%.*]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> [[A0:%.*]], <8 x i16> splat (i16 1))
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -120,7 +120,7 @@ define <8 x i16> @one_pmulh_128(<8 x i16> %a0) {
 
 define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_128_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> [[A0:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> splat (i16 1), <8 x i16> [[A0:%.*]])
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <8 x i16> %a0)
@@ -129,7 +129,7 @@ define <8 x i16> @one_pmulh_128_commute(<8 x i16> %a0) {
 
 define <16 x i16> @one_pmulh_256(<16 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> [[A0:%.*]], <16 x i16> splat (i16 1))
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -138,7 +138,7 @@ define <16 x i16> @one_pmulh_256(<16 x i16> %a0) {
 
 define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_256_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> [[A0:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> splat (i16 1), <16 x i16> [[A0:%.*]])
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <16 x i16> %a0)
@@ -147,7 +147,7 @@ define <16 x i16> @one_pmulh_256_commute(<16 x i16> %a0) {
 
 define <32 x i16> @one_pmulh_512(<32 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[A0:%.*]], <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> [[A0:%.*]], <32 x i16> splat (i16 1))
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %a0, <32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -156,7 +156,7 @@ define <32 x i16> @one_pmulh_512(<32 x i16> %a0) {
 
 define <32 x i16> @one_pmulh_512_commute(<32 x i16> %a0) {
 ; CHECK-LABEL: @one_pmulh_512_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> [[A0:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> splat (i16 1), <32 x i16> [[A0:%.*]])
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, <32 x i16> %a0)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-ternlog.ll b/llvm/test/Transforms/InstCombine/X86/x86-ternlog.ll
index 951ed6d3d108ab..2376d18748ee13 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-ternlog.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-ternlog.ll
@@ -144,7 +144,7 @@ define <8 x i32> @vpternlog_d_v256_imm14(<8 x i32> %v0, <8 x i32> %v1, <8 x i32>
 
 define <8 x i64> @vpternlog_q_v512_imm15(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2) {
 ; CHECK-LABEL: @vpternlog_q_v512_imm15(
-; CHECK-NEXT:    [[R:%.*]] = xor <8 x i64> [[V0:%.*]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+; CHECK-NEXT:    [[R:%.*]] = xor <8 x i64> [[V0:%.*]], splat (i64 -1)
 ; CHECK-NEXT:    ret <8 x i64> [[R]]
 ;
   %r = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2, i32 15)
@@ -468,7 +468,7 @@ define <8 x i32> @vpternlog_d_v256_imm50(<8 x i32> %v0, <8 x i32> %v1, <8 x i32>
 
 define <8 x i64> @vpternlog_q_v512_imm51(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2) {
 ; CHECK-LABEL: @vpternlog_q_v512_imm51(
-; CHECK-NEXT:    [[R:%.*]] = xor <8 x i64> [[V1:%.*]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+; CHECK-NEXT:    [[R:%.*]] = xor <8 x i64> [[V1:%.*]], splat (i64 -1)
 ; CHECK-NEXT:    ret <8 x i64> [[R]]
 ;
   %r = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2, i32 51)
@@ -774,7 +774,7 @@ define <16 x i32> @vpternlog_d_v512_imm84(<16 x i32> %v0, <16 x i32> %v1, <16 x
 
 define <2 x i64> @vpternlog_q_v128_imm85(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2) {
 ; CHECK-LABEL: @vpternlog_q_v128_imm85(
-; CHECK-NEXT:    [[R:%.*]] = xor <2 x i64> [[V2:%.*]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i64> [[V2:%.*]], splat (i64 -1)
 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 ;
   %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, i32 85)
@@ -2301,7 +2301,7 @@ define <8 x i32> @vpternlog_d_v256_imm254(<8 x i32> %v0, <8 x i32> %v1, <8 x i32
 
 define <8 x i64> @vpternlog_q_v512_imm255(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2) {
 ; CHECK-LABEL: @vpternlog_q_v512_imm255(
-; CHECK-NEXT:    ret <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+; CHECK-NEXT:    ret <8 x i64> splat (i64 -1)
 ;
   %r = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %v0, <8 x i64> %v1, <8 x i64> %v2, i32 255)
   ret <8 x i64> %r
@@ -2797,7 +2797,7 @@ define <16 x i32> @vpternlog_d_constv512_imm60() {
 
 define <2 x i64> @vpternlog_q_constv128_imm61() {
 ; CHECK-LABEL: @vpternlog_q_constv128_imm61(
-; CHECK-NEXT:    ret <2 x i64> <i64 -5, i64 -5>
+; CHECK-NEXT:    ret <2 x i64> splat (i64 -5)
 ;
   %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> <i64 2, i64 9>, <2 x i64> <i64 1, i64 0>, <2 x i64> <i64 5, i64 4>, i32 61)
   ret <2 x i64> %r
@@ -3037,7 +3037,7 @@ define <16 x i32> @vpternlog_d_constv512_imm90() {
 
 define <2 x i64> @vpternlog_q_constv128_imm91() {
 ; CHECK-LABEL: @vpternlog_q_constv128_imm91(
-; CHECK-NEXT:    ret <2 x i64> <i64 -1, i64 -1>
+; CHECK-NEXT:    ret <2 x i64> splat (i64 -1)
 ;
   %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> <i64 0, i64 5>, <2 x i64> <i64 0, i64 2>, <2 x i64> <i64 4, i64 2>, i32 91)
   ret <2 x i64> %r
@@ -3085,7 +3085,7 @@ define <16 x i32> @vpternlog_d_constv512_imm96() {
 
 define <2 x i64> @vpternlog_q_constv128_imm97() {
 ; CHECK-LABEL: @vpternlog_q_constv128_imm97(
-; CHECK-NEXT:    ret <2 x i64> <i64 -14, i64 -14>
+; CHECK-NEXT:    ret <2 x i64> splat (i64 -14)
 ;
   %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> <i64 3, i64 3>, <2 x i64> <i64 8, i64 6>, <2 x i64> <i64 6, i64 8>, i32 97)
   ret <2 x i64> %r
@@ -3133,7 +3133,7 @@ define <16 x i32> @vpternlog_d_constv512_imm102() {
 
 define <2 x i64> @vpternlog_q_constv128_imm103() {
 ; CHECK-LABEL: @vpternlog_q_constv128_imm103(
-; CHECK-NEXT:    ret <2 x i64> <i64 -1, i64 -1>
+; CHECK-NEXT:    ret <2 x i64> splat (i64 -1)
 ;
   %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> <i64 1, i64 2>, <2 x i64> <i64 2, i64 2>, <2 x i64> <i64 5, i64 1>, i32 103)
   ret <2 x i64> %r
@@ -3325,7 +3325,7 @@ define <16 x i32> @vpternlog_d_constv512_imm126() {
 
 define <2 x i64> @vpternlog_q_constv128_imm127() {
 ; CHECK-LABEL: @vpternlog_q_constv128_imm127(
-; CHECK-NEXT:    ret <2 x i64> <i64 -1, i64 -1>
+; CHECK-NEXT:    ret <2 x i64> splat (i64 -1)
 ;
   %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> <i64 3, i64 0>, <2 x i64> <i64 1, i64 3>, <2 x i64> <i64 0, i64 7>, i32 127)
   ret <2 x i64> %r
@@ -3373,7 +3373,7 @@ define <16 x i32> @vpternlog_d_constv512_imm132() {
 
 define <2 x i64> @vpternlog_q_constv128_imm133() {
 ; CHECK-LABEL: @vpternlog_q_constv128_imm133(
-; CHECK-NEXT:    ret <2 x i64> <i64 -14, i64 -14>
+; CHECK-NEXT:    ret <2 x i64> splat (i64 -14)
 ;
   %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> <i64 9, i64 8>, <2 x i64> <i64 2, i64 8>, <2 x i64> <i64 5, i64 5>, i32 133)
   ret <2 x i64> %r
@@ -3565,7 +3565,7 @@ define <16 x i32> @vpternlog_d_constv512_imm156() {
 
 define <2 x i64> @vpternlog_q_constv128_imm157() {
 ; CHECK-LABEL: @vpternlog_q_constv128_imm157(
-; CHECK-NEXT:    ret <2 x i64> <i64 -1, i64 -1>
+; CHECK-NEXT:    ret <2 x i64> splat (i64 -1)
 ;
   %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> <i64 2, i64 1>, <2 x i64> <i64 3, i64 7>, <2 x i64> <i64 3, i64 3>, i32 157)
   ret <2 x i64> %r
@@ -3837,7 +3837,7 @@ define <4 x i32> @vpternlog_d_constv128_imm190() {
 
 define <4 x i64> @vpternlog_q_constv256_imm191() {
 ; CHECK-LABEL: @vpternlog_q_constv256_imm191(
-; CHECK-NEXT:    ret <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
+; CHECK-NEXT:    ret <4 x i64> splat (i64 -1)
 ;
   %r = tail call <4 x i64> @llvm.x86.avx512.pternlog.q.256(<4 x i64> <i64 5, i64 0, i64 6, i64 0>, <4 x i64> <i64 9, i64 8, i64 4, i64 0>, <4 x i64> <i64 3, i64 9, i64 7, i64 2>, i32 191)
   ret <4 x i64> %r
@@ -4093,7 +4093,7 @@ define <16 x i32> @vpternlog_d_constv512_imm222() {
 
 define <2 x i64> @vpternlog_q_constv128_imm223() {
 ; CHECK-LABEL: @vpternlog_q_constv128_imm223(
-; CHECK-NEXT:    ret <2 x i64> <i64 -1, i64 -1>
+; CHECK-NEXT:    ret <2 x i64> splat (i64 -1)
 ;
   %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> <i64 4, i64 3>, <2 x i64> <i64 7, i64 9>, <2 x i64> <i64 5, i64 0>, i32 223)
   ret <2 x i64> %r
@@ -4285,7 +4285,7 @@ define <16 x i32> @vpternlog_d_constv512_imm246() {
 
 define <2 x i64> @vpternlog_q_constv128_imm247() {
 ; CHECK-LABEL: @vpternlog_q_constv128_imm247(
-; CHECK-NEXT:    ret <2 x i64> <i64 -1, i64 -1>
+; CHECK-NEXT:    ret <2 x i64> splat (i64 -1)
 ;
   %r = tail call <2 x i64> @llvm.x86.avx512.pternlog.q.128(<2 x i64> <i64 2, i64 6>, <2 x i64> <i64 7, i64 7>, <2 x i64> <i64 0, i64 8>, i32 247)
   ret <2 x i64> %r
@@ -4349,7 +4349,7 @@ define <8 x i32> @vpternlog_d_constv256_imm254() {
 
 define <8 x i64> @vpternlog_q_constv512_imm255() {
 ; CHECK-LABEL: @vpternlog_q_constv512_imm255(
-; CHECK-NEXT:    ret <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+; CHECK-NEXT:    ret <8 x i64> splat (i64 -1)
 ;
   %r = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> <i64 2, i64 3, i64 6, i64 0, i64 0, i64 6, i64 9, i64 3>, <8 x i64> <i64 4, i64 2, i64 1, i64 5, i64 3, i64 1, i64 5, i64 0>, <8 x i64> <i64 1, i64 5, i64 4, i64 4, i64 9, i64 7, i64 2, i64 5>, i32 255)
   ret <8 x i64> %r
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll
index 5b6ce56b4b3265..5a74511139c117 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts-inseltpoison.ll
@@ -16,7 +16,7 @@ define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psrai_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
@@ -25,7 +25,7 @@ define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psrai_w_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
@@ -42,7 +42,7 @@ define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psrai_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
@@ -51,7 +51,7 @@ define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psrai_d_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
@@ -68,7 +68,7 @@ define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psrai_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
@@ -77,7 +77,7 @@ define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psrai_w_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
@@ -94,7 +94,7 @@ define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psrai_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
@@ -103,7 +103,7 @@ define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psrai_d_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
@@ -120,7 +120,7 @@ define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) {
 
 define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrai_q_128_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15)
@@ -129,7 +129,7 @@ define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) {
 
 define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrai_q_128_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64)
@@ -146,7 +146,7 @@ define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) {
 
 define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrai_q_256_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15)
@@ -155,7 +155,7 @@ define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) {
 
 define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrai_q_256_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64)
@@ -172,7 +172,7 @@ define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psrai_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15)
@@ -181,7 +181,7 @@ define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psrai_w_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64)
@@ -198,7 +198,7 @@ define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psrai_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15)
@@ -207,7 +207,7 @@ define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psrai_d_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64)
@@ -224,7 +224,7 @@ define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrai_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15)
@@ -233,7 +233,7 @@ define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrai_q_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64)
@@ -254,7 +254,7 @@ define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psrli_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
@@ -279,7 +279,7 @@ define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psrli_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
@@ -304,7 +304,7 @@ define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
 
 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
 ; CHECK-LABEL: @sse2_psrli_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], <i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
@@ -329,7 +329,7 @@ define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psrli_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
@@ -354,7 +354,7 @@ define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psrli_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
@@ -379,7 +379,7 @@ define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
 
 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
 ; CHECK-LABEL: @avx2_psrli_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
@@ -404,7 +404,7 @@ define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psrli_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15)
@@ -429,7 +429,7 @@ define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psrli_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15)
@@ -454,7 +454,7 @@ define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrli_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15)
@@ -483,7 +483,7 @@ define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
 
 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_pslli_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
@@ -508,7 +508,7 @@ define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
 
 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_pslli_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
@@ -533,7 +533,7 @@ define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
 
 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
 ; CHECK-LABEL: @sse2_pslli_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], <i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
@@ -558,7 +558,7 @@ define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
 
 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_pslli_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
@@ -583,7 +583,7 @@ define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
 
 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_pslli_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
@@ -608,7 +608,7 @@ define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
 
 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
 ; CHECK-LABEL: @avx2_pslli_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
@@ -633,7 +633,7 @@ define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) {
 
 define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_pslli_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15)
@@ -658,7 +658,7 @@ define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) {
 
 define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_pslli_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15)
@@ -683,7 +683,7 @@ define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) {
 
 define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_pslli_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15)
@@ -712,7 +712,7 @@ define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psra_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -721,7 +721,7 @@ define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psra_w_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
@@ -730,7 +730,7 @@ define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psra_w_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -747,7 +747,7 @@ define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psra_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -756,7 +756,7 @@ define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psra_d_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
@@ -765,7 +765,7 @@ define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psra_d_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
@@ -782,7 +782,7 @@ define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psra_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -791,7 +791,7 @@ define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psra_w_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
@@ -800,7 +800,7 @@ define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psra_w_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -817,7 +817,7 @@ define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psra_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -826,7 +826,7 @@ define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psra_d_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
@@ -835,7 +835,7 @@ define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psra_d_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
@@ -852,7 +852,7 @@ define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) {
 
 define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) {
 ; CHECK-LABEL: @avx512_psra_q_128_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -861,7 +861,7 @@ define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) {
 
 define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) {
 ; CHECK-LABEL: @avx512_psra_q_128_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
@@ -878,7 +878,7 @@ define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) {
 
 define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) {
 ; CHECK-LABEL: @avx512_psra_q_256_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -887,7 +887,7 @@ define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) {
 
 define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) {
 ; CHECK-LABEL: @avx512_psra_q_256_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
@@ -904,7 +904,7 @@ define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psra_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -913,7 +913,7 @@ define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psra_w_512_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
@@ -922,7 +922,7 @@ define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psra_w_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -939,7 +939,7 @@ define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psra_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -948,7 +948,7 @@ define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psra_d_512_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
@@ -957,7 +957,7 @@ define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psra_d_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
@@ -974,7 +974,7 @@ define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psra_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -983,7 +983,7 @@ define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psra_q_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
@@ -1004,7 +1004,7 @@ define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psrl_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -1037,7 +1037,7 @@ define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psrl_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -1070,7 +1070,7 @@ define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
 
 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
 ; CHECK-LABEL: @sse2_psrl_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], <i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -1095,7 +1095,7 @@ define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psrl_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -1128,7 +1128,7 @@ define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psrl_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -1161,7 +1161,7 @@ define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
 
 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
 ; CHECK-LABEL: @avx2_psrl_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -1186,7 +1186,7 @@ define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psrl_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -1219,7 +1219,7 @@ define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psrl_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -1252,7 +1252,7 @@ define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrl_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -1281,7 +1281,7 @@ define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psll_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -1314,7 +1314,7 @@ define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psll_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -1347,7 +1347,7 @@ define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
 
 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
 ; CHECK-LABEL: @sse2_psll_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], <i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -1372,7 +1372,7 @@ define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psll_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -1405,7 +1405,7 @@ define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psll_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -1438,7 +1438,7 @@ define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
 
 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
 ; CHECK-LABEL: @avx2_psll_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -1463,7 +1463,7 @@ define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psll_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -1496,7 +1496,7 @@ define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psll_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -1529,7 +1529,7 @@ define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psll_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -2911,7 +2911,7 @@ define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) {
 
 define <4 x i32> @avx2_psrav_d_128_masked(<4 x i32> %v, <4 x i32> %a) {
 ; CHECK-LABEL: @avx2_psrav_d_128_masked(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
index b1e5fa4f9e1c94..d38e22a4e3851b 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
@@ -16,7 +16,7 @@ define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psrai_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
@@ -25,7 +25,7 @@ define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psrai_w_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
@@ -42,7 +42,7 @@ define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psrai_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
@@ -51,7 +51,7 @@ define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psrai_d_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
@@ -68,7 +68,7 @@ define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psrai_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
@@ -77,7 +77,7 @@ define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psrai_w_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
@@ -94,7 +94,7 @@ define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psrai_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
@@ -103,7 +103,7 @@ define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psrai_d_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
@@ -120,7 +120,7 @@ define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) {
 
 define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrai_q_128_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15)
@@ -129,7 +129,7 @@ define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) {
 
 define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrai_q_128_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64)
@@ -146,7 +146,7 @@ define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) {
 
 define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrai_q_256_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15)
@@ -155,7 +155,7 @@ define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) {
 
 define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrai_q_256_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64)
@@ -172,7 +172,7 @@ define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psrai_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15)
@@ -181,7 +181,7 @@ define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psrai_w_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64)
@@ -198,7 +198,7 @@ define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psrai_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15)
@@ -207,7 +207,7 @@ define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psrai_d_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64)
@@ -224,7 +224,7 @@ define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrai_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15)
@@ -233,7 +233,7 @@ define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrai_q_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64)
@@ -254,7 +254,7 @@ define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psrli_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
@@ -279,7 +279,7 @@ define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psrli_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
@@ -304,7 +304,7 @@ define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
 
 define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
 ; CHECK-LABEL: @sse2_psrli_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], <i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
@@ -329,7 +329,7 @@ define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psrli_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
@@ -354,7 +354,7 @@ define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psrli_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
@@ -379,7 +379,7 @@ define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
 
 define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
 ; CHECK-LABEL: @avx2_psrli_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
@@ -404,7 +404,7 @@ define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psrli_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15)
@@ -429,7 +429,7 @@ define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psrli_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15)
@@ -454,7 +454,7 @@ define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrli_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15)
@@ -483,7 +483,7 @@ define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
 
 define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_pslli_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
@@ -508,7 +508,7 @@ define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
 
 define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_pslli_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
@@ -533,7 +533,7 @@ define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
 
 define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
 ; CHECK-LABEL: @sse2_pslli_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], <i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
@@ -558,7 +558,7 @@ define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
 
 define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_pslli_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
@@ -583,7 +583,7 @@ define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
 
 define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_pslli_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
@@ -608,7 +608,7 @@ define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
 
 define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
 ; CHECK-LABEL: @avx2_pslli_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
@@ -633,7 +633,7 @@ define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) {
 
 define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_pslli_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15)
@@ -658,7 +658,7 @@ define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) {
 
 define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_pslli_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15)
@@ -683,7 +683,7 @@ define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) {
 
 define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_pslli_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15)
@@ -712,7 +712,7 @@ define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psra_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -721,7 +721,7 @@ define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psra_w_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
@@ -730,7 +730,7 @@ define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psra_w_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -747,7 +747,7 @@ define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psra_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -756,7 +756,7 @@ define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psra_d_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
@@ -765,7 +765,7 @@ define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psra_d_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
@@ -782,7 +782,7 @@ define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psra_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -791,7 +791,7 @@ define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psra_w_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
@@ -800,7 +800,7 @@ define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psra_w_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -817,7 +817,7 @@ define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psra_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -826,7 +826,7 @@ define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psra_d_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
@@ -835,7 +835,7 @@ define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psra_d_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
@@ -852,7 +852,7 @@ define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) {
 
 define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) {
 ; CHECK-LABEL: @avx512_psra_q_128_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -861,7 +861,7 @@ define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) {
 
 define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) {
 ; CHECK-LABEL: @avx512_psra_q_128_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], <i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[V:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
@@ -878,7 +878,7 @@ define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) {
 
 define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) {
 ; CHECK-LABEL: @avx512_psra_q_256_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -887,7 +887,7 @@ define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) {
 
 define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) {
 ; CHECK-LABEL: @avx512_psra_q_256_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> [[V:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
@@ -904,7 +904,7 @@ define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psra_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -913,7 +913,7 @@ define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psra_w_512_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
@@ -922,7 +922,7 @@ define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psra_w_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -939,7 +939,7 @@ define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psra_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -948,7 +948,7 @@ define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psra_d_512_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
@@ -957,7 +957,7 @@ define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psra_d_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
@@ -974,7 +974,7 @@ define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psra_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -983,7 +983,7 @@ define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psra_q_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> [[V:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
@@ -1004,7 +1004,7 @@ define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psrl_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -1037,7 +1037,7 @@ define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psrl_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -1070,7 +1070,7 @@ define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
 
 define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
 ; CHECK-LABEL: @sse2_psrl_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], <i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -1095,7 +1095,7 @@ define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psrl_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -1128,7 +1128,7 @@ define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psrl_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -1161,7 +1161,7 @@ define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
 
 define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
 ; CHECK-LABEL: @avx2_psrl_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -1186,7 +1186,7 @@ define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psrl_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -1219,7 +1219,7 @@ define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psrl_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -1252,7 +1252,7 @@ define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psrl_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -1281,7 +1281,7 @@ define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
 
 define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
 ; CHECK-LABEL: @sse2_psll_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 ;
   %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -1314,7 +1314,7 @@ define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
 
 define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
 ; CHECK-LABEL: @sse2_psll_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -1347,7 +1347,7 @@ define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
 
 define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
 ; CHECK-LABEL: @sse2_psll_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], <i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
   %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -1372,7 +1372,7 @@ define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
 
 define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
 ; CHECK-LABEL: @avx2_psll_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
 ;
   %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -1405,7 +1405,7 @@ define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
 
 define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
 ; CHECK-LABEL: @avx2_psll_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
   %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -1438,7 +1438,7 @@ define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
 
 define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
 ; CHECK-LABEL: @avx2_psll_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
   %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -1463,7 +1463,7 @@ define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) {
 
 define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) {
 ; CHECK-LABEL: @avx512_psll_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> [[V:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
 ;
   %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
@@ -1496,7 +1496,7 @@ define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) {
 
 define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) {
 ; CHECK-LABEL: @avx512_psll_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> [[V:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
 ;
   %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
@@ -1529,7 +1529,7 @@ define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) {
 
 define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) {
 ; CHECK-LABEL: @avx512_psll_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> [[V:%.*]], splat (i64 15)
 ; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
 ;
   %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
@@ -2951,7 +2951,7 @@ define <16 x i32> @avx512_pslli_d_512_masked(<16 x i32> %v, i32 %a) {
 
 define <4 x i32> @avx2_psrav_d_128_masked(<4 x i32> %v, <4 x i32> %a) {
 ; CHECK-LABEL: @avx2_psrav_d_128_masked(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[V:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll
index ec010b02c86a02..b95f00a1e9df03 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-xop-inseltpoison.ll
@@ -199,7 +199,7 @@ define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
 
 define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: @cmp_strue_v16i8(
-; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    ret <16 x i8> splat (i8 -1)
 ;
   %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
   ret <16 x i8> %1
@@ -207,7 +207,7 @@ define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
 
 define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: @cmp_utrue_v16i8(
-; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    ret <16 x i8> splat (i8 -1)
 ;
   %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
   ret <16 x i8> %1
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-xop.ll b/llvm/test/Transforms/InstCombine/X86/x86-xop.ll
index 644cbc8824f3b3..81db68145638c1 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-xop.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-xop.ll
@@ -199,7 +199,7 @@ define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
 
 define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: @cmp_strue_v16i8(
-; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    ret <16 x i8> splat (i8 -1)
 ;
   %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
   ret <16 x i8> %1
@@ -207,7 +207,7 @@ define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
 
 define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: @cmp_utrue_v16i8(
-; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    ret <16 x i8> splat (i8 -1)
 ;
   %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
   ret <16 x i8> %1
diff --git a/llvm/test/Transforms/InstCombine/abs-1.ll b/llvm/test/Transforms/InstCombine/abs-1.ll
index 63287e59f66346..7037647d116ba2 100644
--- a/llvm/test/Transforms/InstCombine/abs-1.ll
+++ b/llvm/test/Transforms/InstCombine/abs-1.ll
@@ -385,7 +385,7 @@ define <2 x i8> @shifty_abs_commute1(<2 x i8> %x) {
 
 define <2 x i8> @shifty_abs_commute2(<2 x i8> %x) {
 ; CHECK-LABEL: @shifty_abs_commute2(
-; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[ABS:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[Y]], i1 false)
 ; CHECK-NEXT:    ret <2 x i8> [[ABS]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll
index f7c639f1d8e6ba..9b01da1685fe83 100644
--- a/llvm/test/Transforms/InstCombine/abs-intrinsic.ll
+++ b/llvm/test/Transforms/InstCombine/abs-intrinsic.ll
@@ -178,9 +178,9 @@ define i32 @abs_trailing_zeros_negative(i32 %x) {
 
 define <4 x i32> @abs_trailing_zeros_negative_vec(<4 x i32> %x) {
 ; CHECK-LABEL: @abs_trailing_zeros_negative_vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[X:%.*]], <i32 -2, i32 -2, i32 -2, i32 -2>
+; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 -2)
 ; CHECK-NEXT:    [[ABS:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[AND]], i1 false)
-; CHECK-NEXT:    [[AND2:%.*]] = and <4 x i32> [[ABS]], <i32 -4, i32 -4, i32 -4, i32 -4>
+; CHECK-NEXT:    [[AND2:%.*]] = and <4 x i32> [[ABS]], splat (i32 -4)
 ; CHECK-NEXT:    ret <4 x i32> [[AND2]]
 ;
   %and = and <4 x i32> %x, <i32 -2, i32 -2, i32 -2, i32 -2>
diff --git a/llvm/test/Transforms/InstCombine/add-mask-neg.ll b/llvm/test/Transforms/InstCombine/add-mask-neg.ll
index b72f051a0b799b..cb23763249d636 100644
--- a/llvm/test/Transforms/InstCombine/add-mask-neg.ll
+++ b/llvm/test/Transforms/InstCombine/add-mask-neg.ll
@@ -78,8 +78,8 @@ define i32 @dec_mask_multiuse_neg_i32(i32 %X) {
 
 define <2 x i32> @dec_mask_neg_v2i32(<2 x i32> %X) {
 ; CHECK-LABEL: @dec_mask_neg_v2i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[X]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[X]], splat (i32 -1)
 ; CHECK-NEXT:    [[DEC:%.*]] = and <2 x i32> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i32> [[DEC]]
 ;
@@ -91,8 +91,8 @@ define <2 x i32> @dec_mask_neg_v2i32(<2 x i32> %X) {
 
 define <2 x i32> @dec_mask_neg_v2i32_poison(<2 x i32> %X) {
 ; CHECK-LABEL: @dec_mask_neg_v2i32_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[X]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[X]], splat (i32 -1)
 ; CHECK-NEXT:    [[DEC:%.*]] = and <2 x i32> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i32> [[DEC]]
 ;
@@ -106,7 +106,7 @@ define <2 x i32> @dec_mask_multiuse_neg_multiuse_v2i32(<2 x i32> %X) {
 ; CHECK-LABEL: @dec_mask_multiuse_neg_multiuse_v2i32(
 ; CHECK-NEXT:    [[NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]]
 ; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[X]], [[NEG]]
-; CHECK-NEXT:    [[DEC:%.*]] = add <2 x i32> [[MASK]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[DEC:%.*]] = add <2 x i32> [[MASK]], splat (i32 -1)
 ; CHECK-NEXT:    call void @usev(<2 x i32> [[NEG]])
 ; CHECK-NEXT:    call void @usev(<2 x i32> [[MASK]])
 ; CHECK-NEXT:    ret <2 x i32> [[DEC]]
diff --git a/llvm/test/Transforms/InstCombine/add-mask.ll b/llvm/test/Transforms/InstCombine/add-mask.ll
index 00c14e351642e5..6cbb3442d34e09 100644
--- a/llvm/test/Transforms/InstCombine/add-mask.ll
+++ b/llvm/test/Transforms/InstCombine/add-mask.ll
@@ -32,7 +32,7 @@ define i32 @add_mask_sign_commute_i32(i32 %x) {
 define <2 x i32> @add_mask_sign_v2i32(<2 x i32> %x) {
 ; CHECK-LABEL: @add_mask_sign_v2i32(
 ; CHECK-NEXT:    [[ISNEG:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> <i32 7, i32 7>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[ISNEG]], <2 x i32> splat (i32 7), <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %a = ashr <2 x i32> %x, <i32 31, i32 31>
diff --git a/llvm/test/Transforms/InstCombine/add-shl-sdiv-to-srem.ll b/llvm/test/Transforms/InstCombine/add-shl-sdiv-to-srem.ll
index 82224496d90ec7..84462f9a7f592b 100644
--- a/llvm/test/Transforms/InstCombine/add-shl-sdiv-to-srem.ll
+++ b/llvm/test/Transforms/InstCombine/add-shl-sdiv-to-srem.ll
@@ -38,7 +38,7 @@ define i32 @add-shl-sdiv-scalar2(i32 %x) {
 
 define <3 x i8> @add-shl-sdiv-splat0(<3 x i8> %x) {
 ; CHECK-LABEL: @add-shl-sdiv-splat0(
-; CHECK-NEXT:    [[RZ:%.*]] = srem <3 x i8> [[X:%.*]], <i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[RZ:%.*]] = srem <3 x i8> [[X:%.*]], splat (i8 4)
 ; CHECK-NEXT:    ret <3 x i8> [[RZ]]
 ;
   %sd = sdiv <3 x i8> %x, <i8 -4, i8 -4, i8 -4>
@@ -49,7 +49,7 @@ define <3 x i8> @add-shl-sdiv-splat0(<3 x i8> %x) {
 
 define <4 x i32> @add-shl-sdiv-splat1(<4 x i32> %x) {
 ; CHECK-LABEL: @add-shl-sdiv-splat1(
-; CHECK-NEXT:    [[RZ:%.*]] = srem <4 x i32> [[X:%.*]], <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>
+; CHECK-NEXT:    [[RZ:%.*]] = srem <4 x i32> [[X:%.*]], splat (i32 1073741824)
 ; CHECK-NEXT:    ret <4 x i32> [[RZ]]
 ;
   %sd = sdiv <4 x i32> %x, <i32 -1073741824, i32 -1073741824, i32 -1073741824, i32 -1073741824>
@@ -60,7 +60,7 @@ define <4 x i32> @add-shl-sdiv-splat1(<4 x i32> %x) {
 
 define <2 x i64> @add-shl-sdiv-splat2(<2 x i64> %x) {
 ; CHECK-LABEL: @add-shl-sdiv-splat2(
-; CHECK-NEXT:    [[RZ:%.*]] = srem <2 x i64> [[X:%.*]], <i64 32, i64 32>
+; CHECK-NEXT:    [[RZ:%.*]] = srem <2 x i64> [[X:%.*]], splat (i64 32)
 ; CHECK-NEXT:    ret <2 x i64> [[RZ]]
 ;
   %sd = sdiv <2 x i64> %x, <i64 -32, i64 -32>
@@ -134,9 +134,9 @@ define i32 @add-shl-sdiv-i32-use3(i32 %x) {
 declare void @use3xi8(<3 x i8>)
 define <3 x i8> @add-shl-sdiv-use4(<3 x i8> %x) {
 ; CHECK-LABEL: @add-shl-sdiv-use4(
-; CHECK-NEXT:    [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], <i8 -4, i8 -4, i8 -4>
+; CHECK-NEXT:    [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 -4)
 ; CHECK-NEXT:    call void @use3xi8(<3 x i8> [[SD]])
-; CHECK-NEXT:    [[RZ:%.*]] = srem <3 x i8> [[X]], <i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[RZ:%.*]] = srem <3 x i8> [[X]], splat (i8 4)
 ; CHECK-NEXT:    ret <3 x i8> [[RZ]]
 ;
   %sd = sdiv <3 x i8> %x, <i8 -4, i8 -4, i8 -4>
@@ -187,8 +187,8 @@ define i32 @add-shl-sdiv-negative2(i32 %x) {
 
 define <3 x i8> @add-shl-sdiv-negative3(<3 x i8> %x) {
 ; CHECK-LABEL: @add-shl-sdiv-negative3(
-; CHECK-NEXT:    [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], <i8 -5, i8 -5, i8 -5>
-; CHECK-NEXT:    [[SL:%.*]] = shl <3 x i8> [[SD]], <i8 2, i8 2, i8 2>
+; CHECK-NEXT:    [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 -5)
+; CHECK-NEXT:    [[SL:%.*]] = shl <3 x i8> [[SD]], splat (i8 2)
 ; CHECK-NEXT:    [[RZ:%.*]] = add <3 x i8> [[SL]], [[X]]
 ; CHECK-NEXT:    ret <3 x i8> [[RZ]]
 ;
@@ -222,7 +222,7 @@ define <3 x i8> @add-shl-sdiv-3xi8-undef0(<3 x i8> %x) {
 
 define <3 x i8> @add-shl-sdiv-3xi8-undef1(<3 x i8> %x) {
 ; CHECK-LABEL: @add-shl-sdiv-3xi8-undef1(
-; CHECK-NEXT:    [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], <i8 -4, i8 -4, i8 -4>
+; CHECK-NEXT:    [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 -4)
 ; CHECK-NEXT:    [[SL:%.*]] = shl <3 x i8> [[SD]], <i8 2, i8 undef, i8 2>
 ; CHECK-NEXT:    [[RZ:%.*]] = add <3 x i8> [[SL]], [[X]]
 ; CHECK-NEXT:    ret <3 x i8> [[RZ]]
@@ -250,7 +250,7 @@ define <2 x i64> @add-shl-sdiv-nonsplat0(<2 x i64> %x) {
 
 define <3 x i8> @add-shl-sdiv-nonsplat1(<3 x i8> %x) {
 ; CHECK-LABEL: @add-shl-sdiv-nonsplat1(
-; CHECK-NEXT:    [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], <i8 -4, i8 -4, i8 -4>
+; CHECK-NEXT:    [[SD:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 -4)
 ; CHECK-NEXT:    [[SL:%.*]] = shl <3 x i8> [[SD]], <i8 2, i8 2, i8 3>
 ; CHECK-NEXT:    [[RZ:%.*]] = add <3 x i8> [[SL]], [[X]]
 ; CHECK-NEXT:    ret <3 x i8> [[RZ]]
diff --git a/llvm/test/Transforms/InstCombine/add-sitofp.ll b/llvm/test/Transforms/InstCombine/add-sitofp.ll
index f1afcaf5f85d2a..fae1365dfa8533 100644
--- a/llvm/test/Transforms/InstCombine/add-sitofp.ll
+++ b/llvm/test/Transforms/InstCombine/add-sitofp.ll
@@ -101,8 +101,8 @@ define float @test_3(i32 %a, i32 %b) {
 
 define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @test_4(
-; CHECK-NEXT:    [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
-; CHECK-NEXT:    [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 1073741823)
+; CHECK-NEXT:    [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], splat (i32 1073741823)
 ; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw <4 x i32> [[A_AND]], [[B_AND]]
 ; CHECK-NEXT:    [[RES:%.*]] = uitofp nneg <4 x i32> [[TMP1]] to <4 x double>
 ; CHECK-NEXT:    ret <4 x double> [[RES]]
@@ -120,8 +120,8 @@ define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) {
 
 define <4 x float> @test_4_neg(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @test_4_neg(
-; CHECK-NEXT:    [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
-; CHECK-NEXT:    [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 1073741823)
+; CHECK-NEXT:    [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], splat (i32 1073741823)
 ; CHECK-NEXT:    [[A_AND_FP:%.*]] = uitofp nneg <4 x i32> [[A_AND]] to <4 x float>
 ; CHECK-NEXT:    [[B_AND_FP:%.*]] = uitofp nneg <4 x i32> [[B_AND]] to <4 x float>
 ; CHECK-NEXT:    [[RES:%.*]] = fadd <4 x float> [[A_AND_FP]], [[B_AND_FP]]
diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll
index 417c3a950d7805..8e5dcab3a683ea 100644
--- a/llvm/test/Transforms/InstCombine/add.ll
+++ b/llvm/test/Transforms/InstCombine/add.ll
@@ -62,8 +62,8 @@ define i32 @flip_and_mask(i32 %x) {
 
 define <2 x i8> @flip_and_mask_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @flip_and_mask_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[INC:%.*]] = xor <2 x i8> [[TMP1]], <i8 1, i8 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 1)
+; CHECK-NEXT:    [[INC:%.*]] = xor <2 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i8> [[INC]]
 ;
   %shl = shl <2 x i8> %x, <i8 7, i8 7>
@@ -291,7 +291,7 @@ define i1 @test11(i8 %A) {
 
 define <2 x i1> @test11vec(<2 x i8> %a) {
 ; CHECK-LABEL: @test11vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %b = add <2 x i8> %a, <i8 -1, i8 -1>
@@ -312,7 +312,7 @@ define i8 @reassoc_shl1(i8 %x, i8 %y) {
 
 define <2 x i8> @reassoc_shl1_commute1(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @reassoc_shl1_commute1(
-; CHECK-NEXT:    [[REASS_ADD:%.*]] = shl <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[REASS_ADD:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[Y:%.*]], [[REASS_ADD]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -521,7 +521,7 @@ define i32 @test19(i1 %C) {
 
 define <2 x i32> @test19vec(i1 %C) {
 ; CHECK-LABEL: @test19vec(
-; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1123, i32 1123>, <2 x i32> <i32 133, i32 133>
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 1123), <2 x i32> splat (i32 133)
 ; CHECK-NEXT:    ret <2 x i32> [[V]]
 ;
   %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
@@ -554,7 +554,7 @@ define i32 @xor_sign_bit(i32 %x) {
 
 define <2 x i32> @xor_sign_bit_vec_splat(<2 x i32> %x) {
 ; CHECK-LABEL: @xor_sign_bit_vec_splat(
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> [[X:%.*]], <i32 -2147483606, i32 -2147483606>
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -2147483606)
 ; CHECK-NEXT:    ret <2 x i32> [[ADD]]
 ;
   %xor = xor <2 x i32> %x, <i32 2147483648, i32 2147483648>
@@ -684,7 +684,7 @@ define i1 @test21(i32 %x) {
 
 define <2 x i1> @test21vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test21vec(
-; CHECK-NEXT:    [[Y:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 119, i32 119>
+; CHECK-NEXT:    [[Y:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 119)
 ; CHECK-NEXT:    ret <2 x i1> [[Y]]
 ;
   %t = add <2 x i32> %x, <i32 4, i32 4>
@@ -882,8 +882,8 @@ define i8 @masked_add(i8 %x) {
 
 define <2 x i8> @masked_add_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @masked_add_splat(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 -64, i8 -64>
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[AND]], <i8 64, i8 64>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -64)
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[AND]], splat (i8 64)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %and = and <2 x i8> %x, <i8 192, i8 192> ; 0xc0
@@ -1008,7 +1008,7 @@ define i64 @test41_multiuse_constants_cancel(i32 %a) {
 
 define <2 x i64> @test41vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test41vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add nuw <2 x i32> [[A:%.*]], <i32 15, i32 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw <2 x i32> [[A:%.*]], splat (i32 15)
 ; CHECK-NEXT:    [[SUB:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[SUB]]
 ;
@@ -1020,10 +1020,10 @@ define <2 x i64> @test41vec(<2 x i32> %a) {
 
 define <2 x i64> @test41vec_and_multiuse(<2 x i32> %a) {
 ; CHECK-LABEL: @test41vec_and_multiuse(
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw <2 x i32> [[A:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw <2 x i32> [[A:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext <2 x i32> [[ADD]] to <2 x i64>
-; CHECK-NEXT:    [[REASS_ADD:%.*]] = shl nuw nsw <2 x i64> [[ZEXT]], <i64 1, i64 1>
-; CHECK-NEXT:    [[EXTRAUSE:%.*]] = add nsw <2 x i64> [[REASS_ADD]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[REASS_ADD:%.*]] = shl nuw nsw <2 x i64> [[ZEXT]], splat (i64 1)
+; CHECK-NEXT:    [[EXTRAUSE:%.*]] = add nsw <2 x i64> [[REASS_ADD]], splat (i64 -1)
 ; CHECK-NEXT:    ret <2 x i64> [[EXTRAUSE]]
 ;
   %add = add nuw <2 x i32> %a, <i32 16, i32 16>
@@ -1045,7 +1045,7 @@ define i32 @test42(i1 %C) {
 
 define <2 x i32> @test42vec(i1 %C) {
 ; CHECK-LABEL: @test42vec(
-; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1123, i32 1123>, <2 x i32> <i32 133, i32 133>
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 1123), <2 x i32> splat (i32 133)
 ; CHECK-NEXT:    ret <2 x i32> [[V]]
 ;
   %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
@@ -1092,7 +1092,7 @@ define <2 x i32> @test43vec(i1 %which) {
 ; CHECK:       delay:
 ; CHECK-NEXT:    br label [[FINAL]]
 ; CHECK:       final:
-; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1123, i32 1123>, [[ENTRY:%.*]] ], [ <i32 133, i32 133>, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ splat (i32 1123), [[ENTRY:%.*]] ], [ splat (i32 133), [[DELAY]] ]
 ; CHECK-NEXT:    ret <2 x i32> [[A]]
 ;
 entry:
@@ -1230,7 +1230,7 @@ define i32 @test44_non_matching(i32 %A) {
 
 define <2 x i32> @test44_vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test44_vec(
-; CHECK-NEXT:    [[C:%.*]] = and <2 x i32> [[A:%.*]], <i32 -124, i32 -124>
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -124)
 ; CHECK-NEXT:    ret <2 x i32> [[C]]
 ;
   %B = or <2 x i32> %A, <i32 123, i32 123>
@@ -1240,8 +1240,8 @@ define <2 x i32> @test44_vec(<2 x i32> %A) {
 
 define <2 x i32> @test44_vec_non_matching(<2 x i32> %A) {
 ; CHECK-LABEL: @test44_vec_non_matching(
-; CHECK-NEXT:    [[B:%.*]] = or <2 x i32> [[A:%.*]], <i32 123, i32 123>
-; CHECK-NEXT:    [[C:%.*]] = add <2 x i32> [[B]], <i32 -321, i32 -321>
+; CHECK-NEXT:    [[B:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 123)
+; CHECK-NEXT:    [[C:%.*]] = add <2 x i32> [[B]], splat (i32 -321)
 ; CHECK-NEXT:    ret <2 x i32> [[C]]
 ;
   %B = or <2 x i32> %A, <i32 123, i32 123>
@@ -1994,7 +1994,7 @@ define i8 @mul_add_common_factor_commute1(i8 %x, i8 %y) {
 
 define <2 x i8> @mul_add_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @mul_add_common_factor_commute2(
-; CHECK-NEXT:    [[M1:%.*]] = add <2 x i8> [[Y:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[M1:%.*]] = add <2 x i8> [[Y:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[A:%.*]] = mul nuw <2 x i8> [[M1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[A]]
 ;
@@ -2061,8 +2061,8 @@ define i8 @not_mul(i8 %x) {
 define <2 x i8> @not_mul_commute(<2 x i8> %p) {
 ; CHECK-LABEL: @not_mul_commute(
 ; CHECK-NEXT:    [[X:%.*]] = mul <2 x i8> [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i8> [[X]], <i8 43, i8 43>
-; CHECK-NEXT:    [[PLUSX:%.*]] = add <2 x i8> [[TMP1]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i8> [[X]], splat (i8 43)
+; CHECK-NEXT:    [[PLUSX:%.*]] = add <2 x i8> [[TMP1]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i8> [[PLUSX]]
 ;
   %x = mul <2 x i8> %p, %p ; thwart complexity-based canonicalization
@@ -2520,7 +2520,7 @@ define i8 @mul_negpow2(i8 %x, i8 %y) {
 define <2 x i8> @mul_negpow2_commute_vec(<2 x i8> %x, <2 x i8> %p) {
 ; CHECK-LABEL: @mul_negpow2_commute_vec(
 ; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[A:%.*]] = sub <2 x i8> [[Y]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i8> [[A]]
 ;
@@ -2886,7 +2886,7 @@ define i8 @floor_sdiv_by_2_wrong_cast(i8 %x) {
 
 define <2 x i32> @floor_sdiv_vec_commute(<2 x i32> %x) {
 ; CHECK-LABEL: @floor_sdiv_vec_commute(
-; CHECK-NEXT:    [[R:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[R:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %d = sdiv <2 x i32> %x, <i32 4, i32 4>
@@ -3190,7 +3190,7 @@ define i32 @dec_zext_add_nonzero(i8 %x) {
 
 define <2 x i32> @dec_zext_add_nonzero_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @dec_zext_add_nonzero_vec(
-; CHECK-NEXT:    [[O:%.*]] = or <2 x i8> [[X:%.*]], <i8 8, i8 8>
+; CHECK-NEXT:    [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8)
 ; CHECK-NEXT:    [[C:%.*]] = zext <2 x i8> [[O]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[C]]
 ;
@@ -3206,9 +3206,9 @@ define <2 x i32> @dec_zext_add_nonzero_vec(<2 x i8> %x) {
 define <2 x i32> @dec_zext_add_nonzero_vec_undef0(<2 x i8> %x) {
 ; CHECK-LABEL: @dec_zext_add_nonzero_vec_undef0(
 ; CHECK-NEXT:    [[O:%.*]] = or <2 x i8> [[X:%.*]], <i8 8, i8 undef>
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[O]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[O]], splat (i8 -1)
 ; CHECK-NEXT:    [[B:%.*]] = zext <2 x i8> [[A]] to <2 x i32>
-; CHECK-NEXT:    [[C:%.*]] = add nuw nsw <2 x i32> [[B]], <i32 1, i32 1>
+; CHECK-NEXT:    [[C:%.*]] = add nuw nsw <2 x i32> [[B]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[C]]
 ;
   %o = or <2 x i8> %x, <i8 8, i8 undef>
@@ -3233,7 +3233,7 @@ define <2 x i32> @dec_zext_add_nonzero_poison0(<2 x i8> %x) {
 
 define <2 x i32> @dec_zext_add_nonzero_vec_poison1(<2 x i8> %x) {
 ; CHECK-LABEL: @dec_zext_add_nonzero_vec_poison1(
-; CHECK-NEXT:    [[O:%.*]] = or <2 x i8> [[X:%.*]], <i8 8, i8 8>
+; CHECK-NEXT:    [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8)
 ; CHECK-NEXT:    [[C:%.*]] = zext <2 x i8> [[O]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[C]]
 ;
@@ -3246,8 +3246,8 @@ define <2 x i32> @dec_zext_add_nonzero_vec_poison1(<2 x i8> %x) {
 
 define <2 x i32> @dec_zext_add_nonzero_vec_poison2(<2 x i8> %x) {
 ; CHECK-LABEL: @dec_zext_add_nonzero_vec_poison2(
-; CHECK-NEXT:    [[O:%.*]] = or <2 x i8> [[X:%.*]], <i8 8, i8 8>
-; CHECK-NEXT:    [[A:%.*]] = add nsw <2 x i8> [[O]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 8)
+; CHECK-NEXT:    [[A:%.*]] = add nsw <2 x i8> [[O]], splat (i8 -1)
 ; CHECK-NEXT:    [[B:%.*]] = zext <2 x i8> [[A]] to <2 x i32>
 ; CHECK-NEXT:    [[C:%.*]] = add nuw nsw <2 x i32> [[B]], <i32 1, i32 poison>
 ; CHECK-NEXT:    ret <2 x i32> [[C]]
diff --git a/llvm/test/Transforms/InstCombine/add4.ll b/llvm/test/Transforms/InstCombine/add4.ll
index 77f7fc7b35cd44..0e97deb4d98ade 100644
--- a/llvm/test/Transforms/InstCombine/add4.ll
+++ b/llvm/test/Transforms/InstCombine/add4.ll
@@ -19,7 +19,7 @@ define i64 @match_unsigned(i64 %x) {
 define <2 x i64> @match_unsigned_vector(<2 x i64> %x) {
 ; CHECK-LABEL: @match_unsigned_vector(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[UREM:%.*]] = urem <2 x i64> [[X:%.*]], <i64 19136, i64 19136>
+; CHECK-NEXT:    [[UREM:%.*]] = urem <2 x i64> [[X:%.*]], splat (i64 19136)
 ; CHECK-NEXT:    ret <2 x i64> [[UREM]]
 ;
 bb:
@@ -63,7 +63,7 @@ define i64 @match_signed(i64 %x) {
 define <2 x i64> @match_signed_vector(<2 x i64> %x) {
 ; CHECK-LABEL: @match_signed_vector(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[SREM1:%.*]] = srem <2 x i64> [[X:%.*]], <i64 172224, i64 172224>
+; CHECK-NEXT:    [[SREM1:%.*]] = srem <2 x i64> [[X:%.*]], splat (i64 172224)
 ; CHECK-NEXT:    ret <2 x i64> [[SREM1]]
 ;
 bb:
diff --git a/llvm/test/Transforms/InstCombine/add_or_sub.ll b/llvm/test/Transforms/InstCombine/add_or_sub.ll
index ef44f036b71fa4..7d005d3cb16f14 100644
--- a/llvm/test/Transforms/InstCombine/add_or_sub.ll
+++ b/llvm/test/Transforms/InstCombine/add_or_sub.ll
@@ -58,7 +58,7 @@ define i64 @add_or_sub_comb_i64_commuted4(i64 %p) {
 define <3 x i32> @add_or_sub_comb_i32vec(<3 x i32> %p) {
 ; CHECK-LABEL: @add_or_sub_comb_i32vec(
 ; CHECK-NEXT:    [[X:%.*]] = mul <3 x i32> [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[TMP1:%.*]] = add <3 x i32> [[X]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <3 x i32> [[X]], splat (i32 -1)
 ; CHECK-NEXT:    [[ADD:%.*]] = and <3 x i32> [[TMP1]], [[X]]
 ; CHECK-NEXT:    ret <3 x i32> [[ADD]]
 ;
@@ -72,7 +72,7 @@ define <3 x i32> @add_or_sub_comb_i32vec(<3 x i32> %p) {
 define <4 x i16> @add_or_sub_comb_i32vec_poison(<4 x i16> %p) {
 ; CHECK-LABEL: @add_or_sub_comb_i32vec_poison(
 ; CHECK-NEXT:    [[X:%.*]] = mul <4 x i16> [[P:%.*]], [[P]]
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[X]], <i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[X]], splat (i16 -1)
 ; CHECK-NEXT:    [[ADD:%.*]] = and <4 x i16> [[TMP1]], [[X]]
 ; CHECK-NEXT:    ret <4 x i16> [[ADD]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll b/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll
index da92ca11a9e0d0..b38466f6cf0ec2 100644
--- a/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll
+++ b/llvm/test/Transforms/InstCombine/addsub-constant-folding.ll
@@ -31,7 +31,7 @@ define i32 @add_const_add_const_extrause(i32 %arg) {
 
 define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_add_const_add_const(
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 10)
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -41,9 +41,9 @@ define <4 x i32> @vec_add_const_add_const(<4 x i32> %arg) {
 
 define <4 x i32> @vec_add_const_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_add_const_add_const_extrause(
-; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 8)
 ; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 10)
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -89,7 +89,7 @@ define i32 @add_const_sub_const_extrause(i32 %arg) {
 
 define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_add_const_sub_const(
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 6)
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -99,9 +99,9 @@ define <4 x i32> @vec_add_const_sub_const(<4 x i32> %arg) {
 
 define <4 x i32> @vec_add_const_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_add_const_sub_const_extrause(
-; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 8)
 ; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 6)
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -260,7 +260,7 @@ define i32 @add_const_const_sub_extrause(i32 %arg) {
 
 define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_add_const_const_sub(
-; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 -6, i32 -6, i32 -6, i32 -6>, [[ARG:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> splat (i32 -6), [[ARG:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -270,9 +270,9 @@ define <4 x i32> @vec_add_const_const_sub(<4 x i32> %arg) {
 
 define <4 x i32> @vec_add_const_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_add_const_const_sub_extrause(
-; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 8)
 ; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 -6, i32 -6, i32 -6, i32 -6>, [[ARG]]
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> splat (i32 -6), [[ARG]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = add <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -318,7 +318,7 @@ define i32 @sub_const_add_const_extrause(i32 %arg) {
 
 define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_sub_const_add_const(
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -6, i32 -6, i32 -6, i32 -6>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -6)
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -328,9 +328,9 @@ define <4 x i32> @vec_sub_const_add_const(<4 x i32> %arg) {
 
 define <4 x i32> @vec_sub_const_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_sub_const_add_const_extrause(
-; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -8, i32 -8, i32 -8, i32 -8>
+; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -8)
 ; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], <i32 -6, i32 -6, i32 -6, i32 -6>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 -6)
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -376,7 +376,7 @@ define i32 @sub_const_sub_const_extrause(i32 %arg) {
 
 define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_sub_const_sub_const(
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -10, i32 -10, i32 -10, i32 -10>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -10)
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -386,9 +386,9 @@ define <4 x i32> @vec_sub_const_sub_const(<4 x i32> %arg) {
 
 define <4 x i32> @vec_sub_const_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_sub_const_sub_const_extrause(
-; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -8, i32 -8, i32 -8, i32 -8>
+; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -8)
 ; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], <i32 -10, i32 -10, i32 -10, i32 -10>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 -10)
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -434,7 +434,7 @@ define i32 @sub_const_const_sub_extrause(i32 %arg) {
 
 define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_sub_const_const_sub(
-; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 10, i32 10, i32 10, i32 10>, [[ARG:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> splat (i32 10), [[ARG:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -444,9 +444,9 @@ define <4 x i32> @vec_sub_const_const_sub(<4 x i32> %arg) {
 
 define <4 x i32> @vec_sub_const_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_sub_const_const_sub_extrause(
-; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -8, i32 -8, i32 -8, i32 -8>
+; CHECK-NEXT:    [[T0:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -8)
 ; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 10, i32 10, i32 10, i32 10>, [[ARG]]
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> splat (i32 10), [[ARG]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> %arg, <i32 8, i32 8, i32 8, i32 8>
@@ -492,7 +492,7 @@ define i32 @const_sub_add_const_extrause(i32 %arg) {
 
 define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_add_const(
-; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 10, i32 10, i32 10, i32 10>, [[ARG:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> splat (i32 10), [[ARG:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -502,9 +502,9 @@ define <4 x i32> @vec_const_sub_add_const(<4 x i32> %arg) {
 
 define <4 x i32> @vec_const_sub_add_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_add_const_extrause(
-; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> splat (i32 8), [[ARG:%.*]]
 ; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 10, i32 10, i32 10, i32 10>, [[ARG]]
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> splat (i32 10), [[ARG]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -550,7 +550,7 @@ define i32 @const_sub_sub_const_extrause(i32 %arg) {
 
 define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_sub_const(
-; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 6, i32 6, i32 6, i32 6>, [[ARG:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> splat (i32 6), [[ARG:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -560,9 +560,9 @@ define <4 x i32> @vec_const_sub_sub_const(<4 x i32> %arg) {
 
 define <4 x i32> @vec_const_sub_sub_const_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_sub_const_extrause(
-; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> splat (i32 8), [[ARG:%.*]]
 ; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> <i32 6, i32 6, i32 6, i32 6>, [[ARG]]
+; CHECK-NEXT:    [[T1:%.*]] = sub <4 x i32> splat (i32 6), [[ARG]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -609,7 +609,7 @@ define i32 @const_sub_const_sub_extrause(i32 %arg) {
 
 define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_const_sub(
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], <i32 -6, i32 -6, i32 -6, i32 -6>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG:%.*]], splat (i32 -6)
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
@@ -619,9 +619,9 @@ define <4 x i32> @vec_const_sub_const_sub(<4 x i32> %arg) {
 
 define <4 x i32> @vec_const_sub_const_sub_extrause(<4 x i32> %arg) {
 ; CHECK-LABEL: @vec_const_sub_const_sub_extrause(
-; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, [[ARG:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = sub <4 x i32> splat (i32 8), [[ARG:%.*]]
 ; CHECK-NEXT:    call void @vec_use(<4 x i32> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], <i32 -6, i32 -6, i32 -6, i32 -6>
+; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[ARG]], splat (i32 -6)
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
   %t0 = sub <4 x i32> <i32 8, i32 8, i32 8, i32 8>, %arg
diff --git a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll
index 76fc7a07be6bd6..0b4720c1a209fb 100644
--- a/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll
+++ b/llvm/test/Transforms/InstCombine/adjust-for-minmax.ll
@@ -115,7 +115,7 @@ define i32 @umax3(i32 %n) {
 
 define <2 x i32> @umax3_vec(<2 x i32> %n) {
 ; CHECK-LABEL: @umax3_vec(
-; CHECK-NEXT:    [[M:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> <i32 5, i32 5>)
+; CHECK-NEXT:    [[M:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> splat (i32 5))
 ; CHECK-NEXT:    ret <2 x i32> [[M]]
 ;
   %t = icmp ugt <2 x i32> %n, <i32 4, i32 4>
@@ -139,7 +139,7 @@ define i32 @umin3(i32 %n) {
 
 define <2 x i32> @umin3_vec(<2 x i32> %n) {
 ; CHECK-LABEL: @umin3_vec(
-; CHECK-NEXT:    [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> <i32 6, i32 6>)
+; CHECK-NEXT:    [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> splat (i32 6))
 ; CHECK-NEXT:    ret <2 x i32> [[M]]
 ;
   %t = icmp ult <2 x i32> %n, <i32 7, i32 7>
@@ -211,7 +211,7 @@ define i32 @umax4(i32 %n) {
 
 define <2 x i32> @umax4_vec(<2 x i32> %n) {
 ; CHECK-LABEL: @umax4_vec(
-; CHECK-NEXT:    [[M:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> <i32 8, i32 8>)
+; CHECK-NEXT:    [[M:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[N:%.*]], <2 x i32> splat (i32 8))
 ; CHECK-NEXT:    ret <2 x i32> [[M]]
 ;
   %t = icmp uge <2 x i32> %n, <i32 8, i32 8>
@@ -235,7 +235,7 @@ define i32 @umin4(i32 %n) {
 
 define <2 x i32> @umin4_vec(<2 x i32> %n) {
 ; CHECK-LABEL: @umin4_vec(
-; CHECK-NEXT:    [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> <i32 9, i32 9>)
+; CHECK-NEXT:    [[M:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[N:%.*]], <2 x i32> splat (i32 9))
 ; CHECK-NEXT:    ret <2 x i32> [[M]]
 ;
   %t = icmp ule <2 x i32> %n, <i32 9, i32 9>
@@ -305,7 +305,7 @@ define i64 @umax_sext(i32 %a) {
 
 define <2 x i64> @umax_sext_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @umax_sext_vec(
-; CHECK-NEXT:    [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> <i32 3, i32 3>)
+; CHECK-NEXT:    [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 3))
 ; CHECK-NEXT:    [[MAX:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MAX]]
 ;
@@ -329,7 +329,7 @@ define i64 @umin_sext(i32 %a) {
 
 define <2 x i64> @umin_sext_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @umin_sext_vec(
-; CHECK-NEXT:    [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> <i32 2, i32 2>)
+; CHECK-NEXT:    [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 2))
 ; CHECK-NEXT:    [[MIN:%.*]] = zext nneg <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MIN]]
 ;
@@ -353,7 +353,7 @@ define i64 @umax_sext2(i32 %a) {
 
 define <2 x i64> @umax_sext2_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @umax_sext2_vec(
-; CHECK-NEXT:    [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> <i32 2, i32 2>)
+; CHECK-NEXT:    [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 2))
 ; CHECK-NEXT:    [[MIN:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MIN]]
 ;
@@ -377,7 +377,7 @@ define i64 @umin_sext2(i32 %a) {
 
 define <2 x i64> @umin_sext2_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @umin_sext2_vec(
-; CHECK-NEXT:    [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> <i32 3, i32 3>)
+; CHECK-NEXT:    [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 3))
 ; CHECK-NEXT:    [[MIN:%.*]] = zext nneg <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MIN]]
 ;
@@ -401,7 +401,7 @@ define i64 @umax_zext(i32 %a) {
 
 define <2 x i64> @umax_zext_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @umax_zext_vec(
-; CHECK-NEXT:    [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> <i32 3, i32 3>)
+; CHECK-NEXT:    [[NARROW:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 3))
 ; CHECK-NEXT:    [[MAX:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MAX]]
 ;
@@ -425,7 +425,7 @@ define i64 @umin_zext(i32 %a) {
 
 define <2 x i64> @umin_zext_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @umin_zext_vec(
-; CHECK-NEXT:    [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> <i32 2, i32 2>)
+; CHECK-NEXT:    [[NARROW:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[A:%.*]], <2 x i32> splat (i32 2))
 ; CHECK-NEXT:    [[MIN:%.*]] = zext nneg <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MIN]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll
index 5a9767a64a2ced..16ea1ad93c752c 100644
--- a/llvm/test/Transforms/InstCombine/and-compare.ll
+++ b/llvm/test/Transforms/InstCombine/and-compare.ll
@@ -23,7 +23,7 @@ define i1 @test1(i32 %a, i32 %b) {
 define <2 x i1> @test1vec(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @test1vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 65280, i32 65280>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 65280)
 ; CHECK-NEXT:    [[TMP:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[TMP]]
 ;
@@ -46,7 +46,7 @@ define i1 @test2(i64 %A) {
 
 define <2 x i1> @test2vec(<2 x i64> %A) {
 ; CHECK-LABEL: @test2vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[A:%.*]], <i64 128, i64 128>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[A:%.*]], splat (i64 128)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i64> [[AND]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -68,7 +68,7 @@ define i1 @test3(i64 %A) {
 
 define <2 x i1> @test3vec(<2 x i64> %A) {
 ; CHECK-LABEL: @test3vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[A:%.*]], <i64 128, i64 128>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[A:%.*]], splat (i64 128)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i64> [[AND]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/and-fcmp.ll b/llvm/test/Transforms/InstCombine/and-fcmp.ll
index c163802fcc935c..c03c9d69051edd 100644
--- a/llvm/test/Transforms/InstCombine/and-fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/and-fcmp.ll
@@ -4644,7 +4644,7 @@ define i1 @clang_builtin_isnormal_inf_check(half %x) {
 define <2 x i1> @clang_builtin_isnormal_inf_check_vector(<2 x half> %x) {
 ; CHECK-LABEL: @clang_builtin_isnormal_inf_check_vector(
 ; CHECK-NEXT:    [[FABS_X:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]])
-; CHECK-NEXT:    [[AND:%.*]] = fcmp oeq <2 x half> [[FABS_X]], <half 0xH7C00, half 0xH7C00>
+; CHECK-NEXT:    [[AND:%.*]] = fcmp oeq <2 x half> [[FABS_X]], splat (half 0xH7C00)
 ; CHECK-NEXT:    ret <2 x i1> [[AND]]
 ;
   %fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
diff --git a/llvm/test/Transforms/InstCombine/and-or-icmp-const-icmp.ll b/llvm/test/Transforms/InstCombine/and-or-icmp-const-icmp.ll
index de5de37fe2df64..ee76e0eb105109 100644
--- a/llvm/test/Transforms/InstCombine/and-or-icmp-const-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/and-or-icmp-const-icmp.ll
@@ -139,7 +139,7 @@ define i1 @ne_multi_c2(i8 %x, i8 %y) {
 define <2 x i1> @eq_vector(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: define <2 x i1> @eq_vector(
 ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 -1)
 ; CHECK-NEXT:    [[OR:%.*]] = icmp uge <2 x i8> [[TMP1]], [[Y]]
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
 ;
@@ -152,7 +152,7 @@ define <2 x i1> @eq_vector(<2 x i8> %x, <2 x i8> %y) {
 define <2 x i1> @ne_vector_equal_5(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: define <2 x i1> @ne_vector_equal_5(
 ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X]], <i8 -6, i8 -6>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 -6)
 ; CHECK-NEXT:    [[AND:%.*]] = icmp ult <2 x i8> [[TMP1]], [[Y]]
 ; CHECK-NEXT:    ret <2 x i1> [[AND]]
 ;
@@ -179,7 +179,7 @@ define <2 x i1> @eq_vector_equal_minus_1(<2 x i8> %x, <2 x i8> %y) {
 define <2 x i1> @ne_vector_equal_minus_7(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: define <2 x i1> @ne_vector_equal_minus_7(
 ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X]], <i8 6, i8 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 6)
 ; CHECK-NEXT:    [[AND:%.*]] = icmp ult <2 x i8> [[TMP1]], [[Y]]
 ; CHECK-NEXT:    ret <2 x i1> [[AND]]
 ;
@@ -193,8 +193,8 @@ define <2 x i1> @ne_vector_equal_minus_7(<2 x i8> %x, <2 x i8> %y) {
 define <2 x i1> @eq_vector_unequal1(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: define <2 x i1> @eq_vector_unequal1(
 ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) {
-; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i8> [[X]], <i8 -5, i8 -5>
-; CHECK-NEXT:    [[C1:%.*]] = icmp eq <2 x i8> [[X]], <i8 2, i8 2>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i8> [[X]], splat (i8 -5)
+; CHECK-NEXT:    [[C1:%.*]] = icmp eq <2 x i8> [[X]], splat (i8 2)
 ; CHECK-NEXT:    [[C2:%.*]] = icmp ugt <2 x i8> [[SUB]], [[Y]]
 ; CHECK-NEXT:    [[OR:%.*]] = or <2 x i1> [[C1]], [[C2]]
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
@@ -209,8 +209,8 @@ define <2 x i1> @eq_vector_unequal1(<2 x i8> %x, <2 x i8> %y) {
 define <2 x i1> @ne_vector_unequal2(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: define <2 x i1> @ne_vector_unequal2(
 ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) {
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[X]], <i8 7, i8 7>
-; CHECK-NEXT:    [[C1:%.*]] = icmp ne <2 x i8> [[X]], <i8 -3, i8 -3>
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[X]], splat (i8 7)
+; CHECK-NEXT:    [[C1:%.*]] = icmp ne <2 x i8> [[X]], splat (i8 -3)
 ; CHECK-NEXT:    [[C2:%.*]] = icmp ule <2 x i8> [[ADD]], [[Y]]
 ; CHECK-NEXT:    [[AND:%.*]] = and <2 x i1> [[C1]], [[C2]]
 ; CHECK-NEXT:    ret <2 x i1> [[AND]]
@@ -228,7 +228,7 @@ define <2 x i1> @ne_vector_unequal2(<2 x i8> %x, <2 x i8> %y) {
 define <2 x i1> @eq_vector_poison_icmp(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: define <2 x i1> @eq_vector_poison_icmp(
 ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X]], <i8 -6, i8 -6>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 -6)
 ; CHECK-NEXT:    [[OR:%.*]] = icmp uge <2 x i8> [[TMP1]], [[Y]]
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
 ;
@@ -242,7 +242,7 @@ define <2 x i1> @eq_vector_poison_icmp(<2 x i8> %x, <2 x i8> %y) {
 define <2 x i1> @eq_vector_poison_add(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: define <2 x i1> @eq_vector_poison_add(
 ; CHECK-SAME: <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X]], <i8 -6, i8 -6>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X]], splat (i8 -6)
 ; CHECK-NEXT:    [[OR:%.*]] = icmp uge <2 x i8> [[TMP1]], [[Y]]
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
index eb4723c86542de..732dd2f7b7e2b4 100644
--- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
@@ -335,8 +335,8 @@ define i1 @and_ne_with_diff_one_signed_logical(i64 %x) {
 
 define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) {
 ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2_splatvec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -33, i32 -33>
-; CHECK-NEXT:    [[OR:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 65, i32 65>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -33)
+; CHECK-NEXT:    [[OR:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 65)
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
 ;
   %cmp1 = icmp eq <2 x i32> %x, <i32 97, i32 97>
@@ -347,8 +347,8 @@ define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) {
 
 define <2 x i1> @and_ne_with_diff_one_splatvec(<2 x i32> %x) {
 ; CHECK-LABEL: @and_ne_with_diff_one_splatvec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -41, i32 -41>
-; CHECK-NEXT:    [[AND:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 -2, i32 -2>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -41)
+; CHECK-NEXT:    [[AND:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -2)
 ; CHECK-NEXT:    ret <2 x i1> [[AND]]
 ;
   %cmp1 = icmp ne <2 x i32> %x, <i32 40, i32 40>
@@ -2154,7 +2154,7 @@ define i1 @samesign(i32 %x, i32 %y) {
 define <2 x i1> @samesign_different_sign_bittest1(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @samesign_different_sign_bittest1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <2 x i32> [[TMP1]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <2 x i32> [[TMP1]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %a = and <2 x i32> %x, %y
@@ -2509,7 +2509,7 @@ define i1 @samesign_inverted_wrong_cmp(i32 %x, i32 %y) {
 define <2 x i1> @icmp_eq_m1_and_eq_m1(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @icmp_eq_m1_and_eq_m1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %rx = icmp eq <2 x i8> %x, <i8 -1, i8 poison>
@@ -2521,7 +2521,7 @@ define <2 x i1> @icmp_eq_m1_and_eq_m1(<2 x i8> %x, <2 x i8> %y) {
 define <2 x i1> @icmp_eq_m1_and_eq_poison_m1(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @icmp_eq_m1_and_eq_poison_m1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %rx = icmp eq <2 x i8> %x, <i8 -1, i8 poison>
@@ -2571,7 +2571,7 @@ define <2 x i1> @icmp_eq_m1_or_eq_m1_fail(<2 x i8> %x, <2 x i8> %y) {
 define <2 x i1> @icmp_ne_m1_or_ne_m1(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @icmp_ne_m1_or_ne_m1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %rx = icmp ne <2 x i8> %x, <i8 -1, i8 -1>
@@ -2680,7 +2680,7 @@ define <2 x i64> @icmp_slt_0_or_icmp_sgt_0_i64x2(<2 x i64> %x) {
 
 define <2 x i64> @icmp_slt_0_or_icmp_sgt_0_i64x2_fail(<2 x i64> %x) {
 ; CHECK-LABEL: @icmp_slt_0_or_icmp_sgt_0_i64x2_fail(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i64> [[X:%.*]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i64> [[X:%.*]], splat (i64 1)
 ; CHECK-NEXT:    [[E:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[E]]
 ;
@@ -2794,7 +2794,7 @@ define i64 @icmp_slt_0_and_icmp_sge_neg1_i64_fail(i64 %x) {
 
 define <2 x i32> @icmp_slt_0_and_icmp_sge_neg1_i32x2(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_slt_0_and_icmp_sge_neg1_i32x2(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
@@ -2807,7 +2807,7 @@ define <2 x i32> @icmp_slt_0_and_icmp_sge_neg1_i32x2(<2 x i32> %x) {
 
 define <2 x i32> @icmp_slt_0_and_icmp_sge_neg2_i32x2(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_slt_0_and_icmp_sge_neg2_i32x2(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 -3, i32 -3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 -3)
 ; CHECK-NEXT:    [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
@@ -3085,7 +3085,7 @@ entry:
 define <4 x i1> @logical_and_icmps_vec1(<4 x i32> %a, <4 x i1> %other_cond) {
 ; CHECK-LABEL: @logical_and_icmps_vec1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[A:%.*]], <i32 10086, i32 10086, i32 10086, i32 10086>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[A:%.*]], splat (i32 10086)
 ; CHECK-NEXT:    [[RET:%.*]] = select <4 x i1> [[OTHER_COND:%.*]], <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[RET]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/and-or.ll b/llvm/test/Transforms/InstCombine/and-or.ll
index fee055a2e12451..a7290bc70a9944 100644
--- a/llvm/test/Transforms/InstCombine/and-or.ll
+++ b/llvm/test/Transforms/InstCombine/and-or.ll
@@ -118,13 +118,13 @@ define i8 @or_and_or_commute1(i8 %x) {
 
 define <2 x i8> @or_and_or_commute1_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @or_and_or_commute1_splat(
-; CHECK-NEXT:    [[XN:%.*]] = or <2 x i8> [[X:%.*]], <i8 16, i8 16>
+; CHECK-NEXT:    [[XN:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 16)
 ; CHECK-NEXT:    call void @use_vec(<2 x i8> [[XN]])
-; CHECK-NEXT:    [[X1:%.*]] = and <2 x i8> [[XN]], <i8 59, i8 59>
+; CHECK-NEXT:    [[X1:%.*]] = and <2 x i8> [[XN]], splat (i8 59)
 ; CHECK-NEXT:    call void @use_vec(<2 x i8> [[X1]])
-; CHECK-NEXT:    [[X2:%.*]] = and <2 x i8> [[X]], <i8 64, i8 64>
+; CHECK-NEXT:    [[X2:%.*]] = and <2 x i8> [[X]], splat (i8 64)
 ; CHECK-NEXT:    call void @use_vec(<2 x i8> [[X2]])
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[XN]], <i8 123, i8 123>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[XN]], splat (i8 123)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %xn = or <2 x i8> %x, <i8 16, i8 16>
@@ -162,14 +162,14 @@ define i8 @or_and_or_commute2(i8 %x, i8 %y) {
 
 define <2 x i8> @or_and_or_commute2_splat(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @or_and_or_commute2_splat(
-; CHECK-NEXT:    [[N:%.*]] = lshr <2 x i8> [[Y:%.*]], <i8 6, i8 6>
+; CHECK-NEXT:    [[N:%.*]] = lshr <2 x i8> [[Y:%.*]], splat (i8 6)
 ; CHECK-NEXT:    [[XN:%.*]] = or <2 x i8> [[N]], [[X:%.*]]
 ; CHECK-NEXT:    call void @use_vec(<2 x i8> [[XN]])
-; CHECK-NEXT:    [[X1:%.*]] = and <2 x i8> [[XN]], <i8 -69, i8 -69>
+; CHECK-NEXT:    [[X1:%.*]] = and <2 x i8> [[XN]], splat (i8 -69)
 ; CHECK-NEXT:    call void @use_vec(<2 x i8> [[X1]])
-; CHECK-NEXT:    [[X2:%.*]] = and <2 x i8> [[X]], <i8 64, i8 64>
+; CHECK-NEXT:    [[X2:%.*]] = and <2 x i8> [[X]], splat (i8 64)
 ; CHECK-NEXT:    call void @use_vec(<2 x i8> [[X2]])
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[XN]], <i8 -5, i8 -5>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[XN]], splat (i8 -5)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %n = lshr <2 x i8> %y, <i8 6, i8 6>
@@ -234,16 +234,16 @@ define i8 @or_and2_or2(i8 %x) {
 
 define <2 x i8> @or_and2_or2_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @or_and2_or2_splat(
-; CHECK-NEXT:    [[O1:%.*]] = or <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[O1:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 1)
 ; CHECK-NEXT:    call void @use_vec(<2 x i8> [[O1]])
-; CHECK-NEXT:    [[O2:%.*]] = or <2 x i8> [[X]], <i8 2, i8 2>
+; CHECK-NEXT:    [[O2:%.*]] = or <2 x i8> [[X]], splat (i8 2)
 ; CHECK-NEXT:    call void @use_vec(<2 x i8> [[O2]])
-; CHECK-NEXT:    [[X1:%.*]] = and <2 x i8> [[O1]], <i8 -71, i8 -71>
+; CHECK-NEXT:    [[X1:%.*]] = and <2 x i8> [[O1]], splat (i8 -71)
 ; CHECK-NEXT:    call void @use_vec(<2 x i8> [[X1]])
-; CHECK-NEXT:    [[X2:%.*]] = and <2 x i8> [[O2]], <i8 66, i8 66>
+; CHECK-NEXT:    [[X2:%.*]] = and <2 x i8> [[O2]], splat (i8 66)
 ; CHECK-NEXT:    call void @use_vec(<2 x i8> [[X2]])
-; CHECK-NEXT:    [[BITFIELD:%.*]] = and <2 x i8> [[X]], <i8 -8, i8 -8>
-; CHECK-NEXT:    [[R:%.*]] = or disjoint <2 x i8> [[BITFIELD]], <i8 3, i8 3>
+; CHECK-NEXT:    [[BITFIELD:%.*]] = and <2 x i8> [[X]], splat (i8 -8)
+; CHECK-NEXT:    [[R:%.*]] = or disjoint <2 x i8> [[BITFIELD]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %o1 = or <2 x i8> %x, <i8 1, i8 1>
@@ -277,8 +277,8 @@ define i8 @and_or_hoist_mask(i8 %a, i8 %b) {
 
 define <2 x i8> @and_xor_hoist_mask_vec_splat(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @and_xor_hoist_mask_vec_splat(
-; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], <i8 6, i8 6>
-; CHECK-NEXT:    [[B_MASKED:%.*]] = and <2 x i8> [[B:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], splat (i8 6)
+; CHECK-NEXT:    [[B_MASKED:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[AND:%.*]] = xor <2 x i8> [[SH]], [[B_MASKED]]
 ; CHECK-NEXT:    ret <2 x i8> [[AND]]
 ;
@@ -305,9 +305,9 @@ define i8 @and_xor_hoist_mask_commute(i8 %a, i8 %b) {
 
 define <2 x i8> @and_or_hoist_mask_commute_vec_splat(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @and_or_hoist_mask_commute_vec_splat(
-; CHECK-NEXT:    [[C:%.*]] = mul <2 x i8> [[B:%.*]], <i8 3, i8 3>
-; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], <i8 6, i8 6>
-; CHECK-NEXT:    [[C_MASKED:%.*]] = and <2 x i8> [[C]], <i8 3, i8 3>
+; CHECK-NEXT:    [[C:%.*]] = mul <2 x i8> [[B:%.*]], splat (i8 3)
+; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[A:%.*]], splat (i8 6)
+; CHECK-NEXT:    [[C_MASKED:%.*]] = and <2 x i8> [[C]], splat (i8 3)
 ; CHECK-NEXT:    [[AND:%.*]] = or <2 x i8> [[C_MASKED]], [[SH]]
 ; CHECK-NEXT:    ret <2 x i8> [[AND]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll
index 3dbf9af7e19343..2c5e36407eb2cd 100644
--- a/llvm/test/Transforms/InstCombine/and-xor-or.ll
+++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll
@@ -66,7 +66,7 @@ define <2 x i32> @and_xor_common_op_commute3(<2 x i32> %pa, <2 x i32> %pb) {
 ; CHECK-SAME: (<2 x i32> [[PA:%.*]], <2 x i32> [[PB:%.*]]) {
 ; CHECK-NEXT:    [[A:%.*]] = udiv <2 x i32> <i32 42, i32 43>, [[PA]]
 ; CHECK-NEXT:    [[B:%.*]] = udiv <2 x i32> <i32 43, i32 42>, [[PB]]
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[B]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[B]], splat (i32 -1)
 ; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[A]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
@@ -83,7 +83,7 @@ define <2 x i32> @and_xor_common_op_commute3(<2 x i32> %pa, <2 x i32> %pb) {
 define <4 x i32> @and_xor_common_op_constant(<4 x i32> %A) {
 ; CHECK-LABEL: define {{[^@]+}}@and_xor_common_op_constant
 ; CHECK-SAME: (<4 x i32> [[A:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 1, i32 2, i32 3, i32 4>
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
@@ -740,9 +740,9 @@ define i32 @not_and_and_not(i32 %a0, i32 %b, i32 %c) {
 define <4 x i64> @not_and_and_not_4i64(<4 x i64> %a0, <4 x i64> %b, <4 x i64> %c) {
 ; CHECK-LABEL: define {{[^@]+}}@not_and_and_not_4i64
 ; CHECK-SAME: (<4 x i64> [[A0:%.*]], <4 x i64> [[B:%.*]], <4 x i64> [[C:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = sdiv <4 x i64> <i64 42, i64 42, i64 42, i64 42>, [[A0]]
+; CHECK-NEXT:    [[A:%.*]] = sdiv <4 x i64> splat (i64 42), [[A0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = or <4 x i64> [[B]], [[C]]
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i64> [[TMP1]], <i64 -1, i64 -1, i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i64> [[TMP1]], splat (i64 -1)
 ; CHECK-NEXT:    [[AND2:%.*]] = and <4 x i64> [[A]], [[TMP2]]
 ; CHECK-NEXT:    ret <4 x i64> [[AND2]]
 ;
@@ -4392,8 +4392,8 @@ define i32 @canonicalize_logic_first_or0_nswnuw(i32 %x) {
 define <2 x i32> @canonicalize_logic_first_or_vector0(<2 x i32> %x) {
 ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_or_vector0
 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 15, i32 15>
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[TMP1]], <i32 112, i32 112>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 15)
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[TMP1]], splat (i32 112)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %a = add <2 x i32> <i32 112, i32 112>, %x ; <0x00000070, 0x00000070>
@@ -4404,8 +4404,8 @@ define <2 x i32> @canonicalize_logic_first_or_vector0(<2 x i32> %x) {
 define <2 x i32> @canonicalize_logic_first_or_vector0_nsw(<2 x i32> %x) {
 ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_or_vector0_nsw
 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 15, i32 15>
-; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 112, i32 112>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 15)
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i32> [[TMP1]], splat (i32 112)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %a = add nsw <2 x i32> <i32 112, i32 112>, %x ; <0x00000070, 0x00000070>
@@ -4416,8 +4416,8 @@ define <2 x i32> @canonicalize_logic_first_or_vector0_nsw(<2 x i32> %x) {
 define <2 x i32> @canonicalize_logic_first_or_vector0_nswnuw(<2 x i32> %x) {
 ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_or_vector0_nswnuw
 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 15, i32 15>
-; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i32> [[TMP1]], <i32 112, i32 112>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 15)
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i32> [[TMP1]], splat (i32 112)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %a = add nsw nuw <2 x i32> <i32 112, i32 112>, %x ; <0x00000070, 0x00000070>
@@ -4538,8 +4538,8 @@ define i8 @canonicalize_logic_first_and0_nswnuw(i8 %x) {
 define <2 x i8> @canonicalize_logic_first_and_vector0(<2 x i8> %x) {
 ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_and_vector0
 ; CHECK-SAME: (<2 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X]], <i8 -10, i8 -10>
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], <i8 48, i8 48>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X]], splat (i8 -10)
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], splat (i8 48)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %a = add <2 x i8> <i8 48, i8 48>, %x
@@ -4550,8 +4550,8 @@ define <2 x i8> @canonicalize_logic_first_and_vector0(<2 x i8> %x) {
 define <2 x i8> @canonicalize_logic_first_and_vector0_nsw(<2 x i8> %x) {
 ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_and_vector0_nsw
 ; CHECK-SAME: (<2 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X]], <i8 -10, i8 -10>
-; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[TMP1]], <i8 48, i8 48>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X]], splat (i8 -10)
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[TMP1]], splat (i8 48)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %a = add nsw <2 x i8> <i8 48, i8 48>, %x
@@ -4562,8 +4562,8 @@ define <2 x i8> @canonicalize_logic_first_and_vector0_nsw(<2 x i8> %x) {
 define <2 x i8> @canonicalize_logic_first_and_vector0_nswnuw(<2 x i8> %x) {
 ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_and_vector0_nswnuw
 ; CHECK-SAME: (<2 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X]], <i8 -10, i8 -10>
-; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i8> [[TMP1]], <i8 48, i8 48>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X]], splat (i8 -10)
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i8> [[TMP1]], splat (i8 48)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %a = add nsw nuw <2 x i8> <i8 48, i8 48>, %x
@@ -4589,7 +4589,7 @@ define <2 x i8> @canonicalize_logic_first_and_vector1(<2 x i8> %x) {
 define <2 x i32> @canonicalize_logic_first_and_vector2(<2 x i32> %x) {
 ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_and_vector2
 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[X]], <i32 612368384, i32 612368384>
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[X]], splat (i32 612368384)
 ; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[A]], <i32 -65536, i32 -32768>
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
@@ -4675,8 +4675,8 @@ define i8 @canonicalize_logic_first_xor_0_nswnuw(i8 %x) {
 define <2 x i32> @canonicalize_logic_first_xor_vector0(<2 x i32> %x) {
 ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_xor_vector0
 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X]], <i32 32783, i32 32783>
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[TMP1]], <i32 -8388608, i32 -8388608>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X]], splat (i32 32783)
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[TMP1]], splat (i32 -8388608)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %a = add <2 x i32> <i32 -8388608, i32 -8388608>, %x ; <0xFF800000, 0xFF800000>
@@ -4687,8 +4687,8 @@ define <2 x i32> @canonicalize_logic_first_xor_vector0(<2 x i32> %x) {
 define <2 x i32> @canonicalize_logic_first_xor_vector0_nsw(<2 x i32> %x) {
 ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_xor_vector0_nsw
 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X]], <i32 32783, i32 32783>
-; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 -8388608, i32 -8388608>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X]], splat (i32 32783)
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i32> [[TMP1]], splat (i32 -8388608)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %a = add nsw <2 x i32> <i32 -8388608, i32 -8388608>, %x ; <0xFF800000, 0xFF800000>
@@ -4699,8 +4699,8 @@ define <2 x i32> @canonicalize_logic_first_xor_vector0_nsw(<2 x i32> %x) {
 define <2 x i32> @canonicalize_logic_first_xor_vector0_nswnuw(<2 x i32> %x) {
 ; CHECK-LABEL: define {{[^@]+}}@canonicalize_logic_first_xor_vector0_nswnuw
 ; CHECK-SAME: (<2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X]], <i32 32783, i32 32783>
-; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i32> [[TMP1]], <i32 -8388608, i32 -8388608>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X]], splat (i32 32783)
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i32> [[TMP1]], splat (i32 -8388608)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %a = add nsw nuw <2 x i32> <i32 -8388608, i32 -8388608>, %x ; <0xFF800000, 0xFF800000>
diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
index 466718c8023007..77c446e2c5cc15 100644
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -322,7 +322,7 @@ define i1 @test18(i32 %A) {
 
 define <2 x i1> @test18_vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test18_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], <i32 127, i32 127>
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 127)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = and <2 x i32> %A, <i32 -128, i32 -128>
@@ -342,7 +342,7 @@ define i1 @test18a(i8 %A) {
 
 define <2 x i1> @test18a_vec(<2 x i8> %A) {
 ; CHECK-LABEL: @test18a_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], <i8 2, i8 2>
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], splat (i8 2)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = and <2 x i8> %A, <i8 -2, i8 -2>
@@ -396,7 +396,7 @@ define i1 @test23_logical(i32 %A) {
 
 define <2 x i1> @test23vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test23vec(
-; CHECK-NEXT:    [[D:%.*]] = icmp eq <2 x i32> [[A:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[D:%.*]] = icmp eq <2 x i32> [[A:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[D]]
 ;
   %B = icmp sgt <2 x i32> %A, <i32 1, i32 1>
@@ -455,8 +455,8 @@ define i1 @test25_logical(i32 %A) {
 
 define <2 x i1> @test25vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test25vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], <i32 -50, i32 -50>
-; CHECK-NEXT:    [[D:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 50, i32 50>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -50)
+; CHECK-NEXT:    [[D:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 50)
 ; CHECK-NEXT:    ret <2 x i1> [[D]]
 ;
   %B = icmp sge <2 x i32> %A, <i32 50, i32 50>
@@ -504,9 +504,9 @@ define i32 @ashr_lowmask_use(i32 %x) {
 
 define <2 x i8> @ashr_lowmask_use_splat(<2 x i8> %x, ptr %p) {
 ; CHECK-LABEL: @ashr_lowmask_use_splat(
-; CHECK-NEXT:    [[A:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[A:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7)
 ; CHECK-NEXT:    store <2 x i8> [[A]], ptr [[P:%.*]], align 2
-; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i8> [[X]], <i8 7, i8 7>
+; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i8> [[X]], splat (i8 7)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %a = ashr <2 x i8> %x, <i8 7, i8 7>
@@ -596,7 +596,7 @@ define i32 @test31(i1 %X) {
 
 define <2 x i32> @and_demanded_bits_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @and_demanded_bits_splat_vec(
-; CHECK-NEXT:    [[Z:%.*]] = and <2 x i32> [[X:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    [[Z:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 7)
 ; CHECK-NEXT:    ret <2 x i32> [[Z]]
 ;
   %y = add <2 x i32> %x, <i32 8, i32 8>
@@ -658,7 +658,7 @@ define i32 @test33b(i32 %b) {
 
 define <2 x i32> @test33vec(<2 x i32> %b) {
 ; CHECK-LABEL: @test33vec(
-; CHECK-NEXT:    [[T13:%.*]] = xor <2 x i32> [[B:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[T13:%.*]] = xor <2 x i32> [[B:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[T13]]
 ;
   %t4.mask = and <2 x i32> %b, <i32 1, i32 1>
@@ -670,7 +670,7 @@ define <2 x i32> @test33vec(<2 x i32> %b) {
 
 define <2 x i32> @test33vecb(<2 x i32> %b) {
 ; CHECK-LABEL: @test33vecb(
-; CHECK-NEXT:    [[T13:%.*]] = xor <2 x i32> [[B:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[T13:%.*]] = xor <2 x i32> [[B:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[T13]]
 ;
   %t4.mask = and <2 x i32> %b, <i32 1, i32 1>
@@ -716,7 +716,7 @@ define i64 @test35(i32 %X) {
 define <2 x i64> @test35_uniform(<2 x i32> %X) {
 ; CHECK-LABEL: @test35_uniform(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 240, i32 240>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 240)
 ; CHECK-NEXT:    [[RES:%.*]] = zext nneg <2 x i32> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[RES]]
 ;
@@ -741,8 +741,8 @@ define i64 @test36(i32 %X) {
 
 define <2 x i64> @test36_uniform(<2 x i32> %X) {
 ; CHECK-LABEL: @test36_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 7, i32 7>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 240, i32 240>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 7)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 240)
 ; CHECK-NEXT:    [[RES:%.*]] = zext nneg <2 x i32> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[RES]]
 ;
@@ -780,8 +780,8 @@ define i64 @test37(i32 %X) {
 
 define <2 x i64> @test37_uniform(<2 x i32> %X) {
 ; CHECK-LABEL: @test37_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[X:%.*]], <i32 7, i32 7>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 240, i32 240>
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[X:%.*]], splat (i32 7)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 240)
 ; CHECK-NEXT:    [[RES:%.*]] = zext nneg <2 x i32> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[RES]]
 ;
@@ -988,7 +988,7 @@ define i32 @test40(i1 %C) {
 
 define <2 x i32> @test40vec(i1 %C) {
 ; CHECK-LABEL: @test40vec(
-; CHECK-NEXT:    [[A:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 104, i32 104>, <2 x i32> <i32 10, i32 10>
+; CHECK-NEXT:    [[A:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 104), <2 x i32> splat (i32 10)
 ; CHECK-NEXT:    ret <2 x i32> [[A]]
 ;
   %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
@@ -1035,7 +1035,7 @@ define <2 x i32> @test41vec(i1 %which) {
 ; CHECK:       delay:
 ; CHECK-NEXT:    br label [[FINAL]]
 ; CHECK:       final:
-; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 104, i32 104>, [[ENTRY:%.*]] ], [ <i32 10, i32 10>, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ splat (i32 104), [[ENTRY:%.*]] ], [ splat (i32 10), [[DELAY]] ]
 ; CHECK-NEXT:    ret <2 x i32> [[A]]
 ;
 entry:
@@ -1525,10 +1525,10 @@ define i32 @not_lowmask_sext_in_reg2(i32 %x) {
 
 define <2 x i32> @lowmask_sext_in_reg_splat(<2 x i32> %x, ptr %p) {
 ; CHECK-LABEL: @lowmask_sext_in_reg_splat(
-; CHECK-NEXT:    [[L:%.*]] = shl <2 x i32> [[X:%.*]], <i32 20, i32 20>
-; CHECK-NEXT:    [[R:%.*]] = ashr exact <2 x i32> [[L]], <i32 20, i32 20>
+; CHECK-NEXT:    [[L:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 20)
+; CHECK-NEXT:    [[R:%.*]] = ashr exact <2 x i32> [[L]], splat (i32 20)
 ; CHECK-NEXT:    store <2 x i32> [[R]], ptr [[P:%.*]], align 8
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X]], <i32 4095, i32 4095>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X]], splat (i32 4095)
 ; CHECK-NEXT:    ret <2 x i32> [[AND]]
 ;
   %l = shl <2 x i32> %x, <i32 20, i32 20>
@@ -1576,9 +1576,9 @@ define i8 @lowmask_add_2_uses(i8 %x) {
 
 define <2 x i8> @lowmask_add_2_splat(<2 x i8> %x, ptr %p) {
 ; CHECK-LABEL: @lowmask_add_2_splat(
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[X:%.*]], <i8 -64, i8 -64>
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -64)
 ; CHECK-NEXT:    store <2 x i8> [[A]], ptr [[P:%.*]], align 2
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[X]], <i8 63, i8 63>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[X]], splat (i8 63)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %a = add <2 x i8> %x, <i8 -64, i8 -64> ; 0xc0
@@ -1619,9 +1619,9 @@ define i8 @not_lowmask_add2(i8 %x) {
 
 define <2 x i8> @lowmask_add_splat(<2 x i8> %x, ptr %p) {
 ; CHECK-LABEL: @lowmask_add_splat(
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[X:%.*]], <i8 -64, i8 -64>
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -64)
 ; CHECK-NEXT:    store <2 x i8> [[A]], ptr [[P:%.*]], align 2
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[X]], <i8 32, i8 32>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[X]], splat (i8 32)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %a = add <2 x i8> %x, <i8 -64, i8 -64> ; 0xc0
@@ -1670,8 +1670,8 @@ define i8 @flip_masked_bit(i8 %A) {
 
 define <2 x i8> @flip_masked_bit_uniform(<2 x i8> %A) {
 ; CHECK-LABEL: @flip_masked_bit_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], <i8 16, i8 16>
-; CHECK-NEXT:    [[C:%.*]] = xor <2 x i8> [[TMP1]], <i8 16, i8 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 16)
+; CHECK-NEXT:    [[C:%.*]] = xor <2 x i8> [[TMP1]], splat (i8 16)
 ; CHECK-NEXT:    ret <2 x i8> [[C]]
 ;
   %B = add <2 x i8> %A, <i8 16, i8 16>
@@ -1681,7 +1681,7 @@ define <2 x i8> @flip_masked_bit_uniform(<2 x i8> %A) {
 
 define <2 x i8> @flip_masked_bit_poison(<2 x i8> %A) {
 ; CHECK-LABEL: @flip_masked_bit_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[C:%.*]] = and <2 x i8> [[TMP1]], <i8 16, i8 poison>
 ; CHECK-NEXT:    ret <2 x i8> [[C]]
 ;
@@ -1692,7 +1692,7 @@ define <2 x i8> @flip_masked_bit_poison(<2 x i8> %A) {
 
 define <2 x i8> @flip_masked_bit_nonuniform(<2 x i8> %A) {
 ; CHECK-LABEL: @flip_masked_bit_nonuniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[C:%.*]] = and <2 x i8> [[TMP1]], <i8 16, i8 4>
 ; CHECK-NEXT:    ret <2 x i8> [[C]]
 ;
@@ -2084,8 +2084,8 @@ define i16 @shl_ashr_pow2_const_case1(i16 %x) {
 
 define <3 x i16> @shl_lshr_pow2_const_case1_uniform_vec(<3 x i16> %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_const_case1_uniform_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 7, i16 7, i16 7>
-; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], splat (i16 7)
+; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 8), <3 x i16> zeroinitializer
 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 ;
   %shl = shl <3 x i16> <i16 4, i16 4, i16 4>, %x
@@ -2123,7 +2123,7 @@ define <3 x i16> @shl_lshr_pow2_const_case1_non_uniform_vec_negative(<3 x i16> %
 define <3 x i16> @shl_lshr_pow2_const_case1_poison1_vec(<3 x i16> %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_const_case1_poison1_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 8, i16 4, i16 4>
-; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 8), <3 x i16> zeroinitializer
 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 ;
   %shl = shl <3 x i16> <i16 poison, i16 16, i16 16>, %x
@@ -2135,7 +2135,7 @@ define <3 x i16> @shl_lshr_pow2_const_case1_poison1_vec(<3 x i16> %x) {
 define <3 x i16> @shl_lshr_pow2_const_case1_poison2_vec(<3 x i16> %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_const_case1_poison2_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 poison, i16 4, i16 4>
-; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 8), <3 x i16> zeroinitializer
 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 ;
   %shl = shl <3 x i16> <i16 16, i16 16, i16 16>, %x
@@ -2146,8 +2146,8 @@ define <3 x i16> @shl_lshr_pow2_const_case1_poison2_vec(<3 x i16> %x) {
 
 define <3 x i16> @shl_lshr_pow2_const_case1_poison3_vec(<3 x i16> %x) {
 ; CHECK-LABEL: @shl_lshr_pow2_const_case1_poison3_vec(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i16> <i16 16, i16 16, i16 16>, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], <i16 5, i16 5, i16 5>
+; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i16> splat (i16 16), [[X:%.*]]
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], splat (i16 5)
 ; CHECK-NEXT:    [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 poison, i16 8, i16 8>
 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 ;
@@ -2380,8 +2380,8 @@ define i16 @lshr_shl_pow2_const_negative_oneuse(i16 %x) {
 
 define <3 x i16> @lshr_shl_pow2_const_case1_uniform_vec(<3 x i16> %x) {
 ; CHECK-LABEL: @lshr_shl_pow2_const_case1_uniform_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 12, i16 12, i16 12>
-; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 128, i16 128, i16 128>, <3 x i16> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], splat (i16 12)
+; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 128), <3 x i16> zeroinitializer
 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 ;
   %lshr = lshr <3 x i16> <i16 8192, i16 8192, i16 8192>, %x
@@ -2419,7 +2419,7 @@ define <3 x i16> @lshr_shl_pow2_const_case1_non_uniform_vec_negative(<3 x i16> %
 define <3 x i16> @lshr_shl_pow2_const_case1_poison1_vec(<3 x i16> %x) {
 ; CHECK-LABEL: @lshr_shl_pow2_const_case1_poison1_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 -1, i16 12, i16 12>
-; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 128, i16 128, i16 128>, <3 x i16> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> splat (i16 128), <3 x i16> zeroinitializer
 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 ;
   %lshr = lshr <3 x i16> <i16 poison, i16 8192, i16 8192>, %x
@@ -2430,9 +2430,9 @@ define <3 x i16> @lshr_shl_pow2_const_case1_poison1_vec(<3 x i16> %x) {
 
 define <3 x i16> @lshr_shl_pow2_const_case1_poison2_vec(<3 x i16> %x) {
 ; CHECK-LABEL: @lshr_shl_pow2_const_case1_poison2_vec(
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <3 x i16> <i16 8192, i16 8192, i16 8192>, [[X:%.*]]
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <3 x i16> splat (i16 8192), [[X:%.*]]
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i16> [[LSHR]], <i16 poison, i16 6, i16 6>
-; CHECK-NEXT:    [[R:%.*]] = and <3 x i16> [[SHL]], <i16 128, i16 128, i16 128>
+; CHECK-NEXT:    [[R:%.*]] = and <3 x i16> [[SHL]], splat (i16 128)
 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 ;
   %lshr = lshr <3 x i16> <i16 8192, i16 8192, i16 8192>, %x
@@ -2443,8 +2443,8 @@ define <3 x i16> @lshr_shl_pow2_const_case1_poison2_vec(<3 x i16> %x) {
 
 define <3 x i16> @lshr_shl_pow2_const_case1_poison3_vec(<3 x i16> %x) {
 ; CHECK-LABEL: @lshr_shl_pow2_const_case1_poison3_vec(
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <3 x i16> <i16 8192, i16 8192, i16 8192>, [[X:%.*]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i16> [[LSHR]], <i16 6, i16 6, i16 6>
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <3 x i16> splat (i16 8192), [[X:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl <3 x i16> [[LSHR]], splat (i16 6)
 ; CHECK-NEXT:    [[R:%.*]] = and <3 x i16> [[SHL]], <i16 poison, i16 128, i16 128>
 ; CHECK-NEXT:    ret <3 x i16> [[R]]
 ;
@@ -2564,7 +2564,7 @@ define i32 @and_zext_multiuse(i32 %a, i1 %b) {
 
 define <2 x i32> @and_zext_vec(<2 x i32> %a, <2 x i1> %b) {
 ; CHECK-LABEL: @and_zext_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i32> [[TMP1]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
@@ -2811,8 +2811,8 @@ define i32 @add_constant_equal_with_the_top_bit_of_demandedbits_pass(i32 %x) {
 
 define <2 x i16> @add_constant_equal_with_the_top_bit_of_demandedbits_pass_vector(<2 x i16> %x) {
 ; CHECK-LABEL: @add_constant_equal_with_the_top_bit_of_demandedbits_pass_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 24, i16 24>
-; CHECK-NEXT:    [[AND:%.*]] = xor <2 x i16> [[TMP1]], <i16 16, i16 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 24)
+; CHECK-NEXT:    [[AND:%.*]] = xor <2 x i16> [[TMP1]], splat (i16 16)
 ; CHECK-NEXT:    ret <2 x i16> [[AND]]
 ;
   %add = add <2 x i16> %x, <i16 16, i16 16>
diff --git a/llvm/test/Transforms/InstCombine/and2.ll b/llvm/test/Transforms/InstCombine/and2.ll
index 104486e7638f56..ce49f4e8f6f12e 100644
--- a/llvm/test/Transforms/InstCombine/and2.ll
+++ b/llvm/test/Transforms/InstCombine/and2.ll
@@ -83,8 +83,8 @@ define i1 @test8_logical(i32 %i) {
 
 define <2 x i1> @test8vec(<2 x i32> %i) {
 ; CHECK-LABEL: @test8vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[I:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[COND:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 13, i32 13>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[I:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 13)
 ; CHECK-NEXT:    ret <2 x i1> [[COND]]
 ;
   %cmp1 = icmp ne <2 x i32> %i, zeroinitializer
@@ -107,7 +107,7 @@ define i64 @test9(i64 %x) {
 ; combine -x & 1 into x & 1
 define <2 x i64> @test9vec(<2 x i64> %x) {
 ; CHECK-LABEL: @test9vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[X:%.*]], <i64 1, i64 1>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i64> [[X:%.*]], splat (i64 1)
 ; CHECK-NEXT:    ret <2 x i64> [[AND]]
 ;
   %sub = sub nsw <2 x i64> <i64 0, i64 0>, %x
@@ -207,7 +207,7 @@ define i8 @and1_lshr1_is_cmp_eq_0_multiuse(i8 %x) {
 
 define <2 x i8> @and1_lshr1_is_cmp_eq_0_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0_vec(
-; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> <i8 1, i8 1>, [[X:%.*]]
+; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> splat (i8 1), [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
 ;
   %sh = lshr <2 x i8> <i8 1, i8 1>, %x
diff --git a/llvm/test/Transforms/InstCombine/apint-select.ll b/llvm/test/Transforms/InstCombine/apint-select.ll
index c96c3dc3686935..6549f22c548fc5 100644
--- a/llvm/test/Transforms/InstCombine/apint-select.ll
+++ b/llvm/test/Transforms/InstCombine/apint-select.ll
@@ -85,7 +85,7 @@ define <2 x i64> @not_sext_vec(<2 x i1> %C) {
 
 define <2 x i32> @scalar_select_of_vectors(i1 %c) {
 ; CHECK-LABEL: @scalar_select_of_vectors(
-; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 1), <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[V]]
 ;
   %V = select i1 %c, <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/ashr-demand.ll b/llvm/test/Transforms/InstCombine/ashr-demand.ll
index a0e2af93b809bf..43593612320f64 100644
--- a/llvm/test/Transforms/InstCombine/ashr-demand.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-demand.ll
@@ -31,8 +31,8 @@ define i32 @srem8_ashr_mask(i32 %a0) {
 
 define <2 x i32> @srem2_ashr_mask_vector(<2 x i32> %a0) {
 ; CHECK-LABEL: @srem2_ashr_mask_vector(
-; CHECK-NEXT:    [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], <i32 2, i32 2>
-; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[SREM]], <i32 2, i32 2>
+; CHECK-NEXT:    [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], splat (i32 2)
+; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[SREM]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[MASK]]
 ;
   %srem = srem <2 x i32> %a0, <i32 2, i32 2>
@@ -43,8 +43,8 @@ define <2 x i32> @srem2_ashr_mask_vector(<2 x i32> %a0) {
 
 define <2 x i32> @srem2_ashr_mask_vector_nonconstant(<2 x i32> %a0, <2 x i32> %a1) {
 ; CHECK-LABEL: @srem2_ashr_mask_vector_nonconstant(
-; CHECK-NEXT:    [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], <i32 2, i32 2>
-; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[SREM]], <i32 2, i32 2>
+; CHECK-NEXT:    [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], splat (i32 2)
+; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[SREM]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[MASK]]
 ;
   %srem = srem <2 x i32> %a0, <i32 2, i32 2>
diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
index 1abf1be2cbedd9..60debf02b58621 100644
--- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
@@ -419,7 +419,7 @@ define i32 @ashr_lshr_no_lshr(i32 %x, i32 %y) {
 
 define <2 x i32> @ashr_lshr_vec_wrong_pred(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @ashr_lshr_vec_wrong_pred(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[L:%.*]] = lshr <2 x i32> [[X]], [[Y:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = ashr <2 x i32> [[X]], [[Y]]
 ; CHECK-NEXT:    [[RET:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[L]], <2 x i32> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/avg-lsb.ll b/llvm/test/Transforms/InstCombine/avg-lsb.ll
index 1e9e4e3bcafb27..f356dd1cab11ef 100644
--- a/llvm/test/Transforms/InstCombine/avg-lsb.ll
+++ b/llvm/test/Transforms/InstCombine/avg-lsb.ll
@@ -34,7 +34,7 @@ define i8 @avg_lsb_mismatch(i8 %a, i8 %b) {
 define <2 x i8> @avg_lsb_vector(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: define <2 x i8> @avg_lsb_vector(
 ; CHECK-SAME: <2 x i8> [[A:%.*]], <2 x i8> [[B:%.*]]) {
-; CHECK-NEXT:    [[REM:%.*]] = and <2 x i8> [[A]], <i8 1, i8 1>
+; CHECK-NEXT:    [[REM:%.*]] = and <2 x i8> [[A]], splat (i8 1)
 ; CHECK-NEXT:    [[DIV2:%.*]] = and <2 x i8> [[B]], [[REM]]
 ; CHECK-NEXT:    ret <2 x i8> [[DIV2]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll
index 4b5de41fc7095d..75bdf66ab64284 100644
--- a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll
+++ b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll
@@ -336,7 +336,7 @@ define <2 x i8> @lshr_add_or_fail_dif_masks(<2 x i8> %x, <2 x i8> %y) {
 define <2 x i8> @shl_or_or_good_mask(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_or_or_good_mask(
 ; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], <i8 1, i8 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    [[BW1:%.*]] = or <2 x i8> [[TMP2]], <i8 18, i8 24>
 ; CHECK-NEXT:    ret <2 x i8> [[BW1]]
 ;
@@ -350,7 +350,7 @@ define <2 x i8> @shl_or_or_good_mask(<2 x i8> %x, <2 x i8> %y) {
 define <2 x i8> @shl_or_or_fail_bad_mask(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @shl_or_or_fail_bad_mask(
 ; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], <i8 1, i8 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    [[BW1:%.*]] = or <2 x i8> [[TMP2]], <i8 19, i8 24>
 ; CHECK-NEXT:    ret <2 x i8> [[BW1]]
 ;
@@ -394,7 +394,7 @@ define <2 x i8> @lshr_or_xor_good_mask(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @lshr_or_xor_good_mask(
 ; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i8> [[Y:%.*]], <i8 -64, i8 64>
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i8> [[X:%.*]], [[TMP1]]
-; CHECK-NEXT:    [[BW1:%.*]] = lshr <2 x i8> [[TMP2]], <i8 6, i8 6>
+; CHECK-NEXT:    [[BW1:%.*]] = lshr <2 x i8> [[TMP2]], splat (i8 6)
 ; CHECK-NEXT:    ret <2 x i8> [[BW1]]
 ;
   %shift1 = lshr <2 x i8> %x, <i8 6, i8 6>
@@ -406,8 +406,8 @@ define <2 x i8> @lshr_or_xor_good_mask(<2 x i8> %x, <2 x i8> %y) {
 
 define <2 x i8> @lshr_or_xor_fail_bad_mask(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @lshr_or_xor_fail_bad_mask(
-; CHECK-NEXT:    [[SHIFT1:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 6, i8 6>
-; CHECK-NEXT:    [[SHIFT2:%.*]] = lshr <2 x i8> [[Y:%.*]], <i8 6, i8 6>
+; CHECK-NEXT:    [[SHIFT1:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 6)
+; CHECK-NEXT:    [[SHIFT2:%.*]] = lshr <2 x i8> [[Y:%.*]], splat (i8 6)
 ; CHECK-NEXT:    [[BW2:%.*]] = or <2 x i8> [[SHIFT2]], <i8 7, i8 1>
 ; CHECK-NEXT:    [[BW1:%.*]] = xor <2 x i8> [[SHIFT1]], [[BW2]]
 ; CHECK-NEXT:    ret <2 x i8> [[BW1]]
@@ -633,7 +633,7 @@ define i8 @and_ashr_not_fail_invalid_xor_constant(i8 %x, i8 %y, i8 %shamt) {
 
 define <4 x i8> @and_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) {
 ; CHECK-LABEL: @and_ashr_not_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[AND:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    ret <4 x i8> [[AND]]
@@ -647,7 +647,7 @@ define <4 x i8> @and_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) {
 
 define <4 x i8> @and_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) {
 ; CHECK-LABEL: @and_ashr_not_vec_commuted(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[AND:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    ret <4 x i8> [[AND]]
@@ -661,7 +661,7 @@ define <4 x i8> @and_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %s
 
 define <4 x i8> @and_ashr_not_vec_poison_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) {
 ; CHECK-LABEL: @and_ashr_not_vec_poison_1(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[AND:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    ret <4 x i8> [[AND]]
@@ -767,7 +767,7 @@ define i8 @or_ashr_not_fail_invalid_xor_constant(i8 %x, i8 %y, i8 %shamt) {
 
 define <4 x i8> @or_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) {
 ; CHECK-LABEL: @or_ashr_not_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[OR:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    ret <4 x i8> [[OR]]
@@ -781,7 +781,7 @@ define <4 x i8> @or_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) {
 
 define <4 x i8> @or_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) {
 ; CHECK-LABEL: @or_ashr_not_vec_commuted(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[OR:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    ret <4 x i8> [[OR]]
@@ -795,7 +795,7 @@ define <4 x i8> @or_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %sh
 
 define <4 x i8> @or_ashr_not_vec_poison_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) {
 ; CHECK-LABEL: @or_ashr_not_vec_poison_1(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[OR:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]]
 ; CHECK-NEXT:    ret <4 x i8> [[OR]]
@@ -902,7 +902,7 @@ define <4 x i8> @xor_ashr_not_vec(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) {
 ; CHECK-LABEL: @xor_ashr_not_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = ashr <4 x i8> [[TMP1]], [[SHAMT:%.*]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], splat (i8 -1)
 ; CHECK-NEXT:    ret <4 x i8> [[XOR]]
 ;
   %x.shift = ashr <4 x i8> %x, %shamt
@@ -916,7 +916,7 @@ define <4 x i8> @xor_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %s
 ; CHECK-LABEL: @xor_ashr_not_vec_commuted(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = ashr <4 x i8> [[TMP1]], [[SHAMT:%.*]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], splat (i8 -1)
 ; CHECK-NEXT:    ret <4 x i8> [[XOR]]
 ;
   %x.shift = ashr <4 x i8> %x, %shamt
@@ -930,7 +930,7 @@ define <4 x i8> @xor_ashr_not_vec_poison_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %s
 ; CHECK-LABEL: @xor_ashr_not_vec_poison_1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = ashr <4 x i8> [[TMP1]], [[SHAMT:%.*]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], splat (i8 -1)
 ; CHECK-NEXT:    ret <4 x i8> [[XOR]]
 ;
   %x.shift = ashr <4 x i8> %x, %shamt
diff --git a/llvm/test/Transforms/InstCombine/binop-cast.ll b/llvm/test/Transforms/InstCombine/binop-cast.ll
index 9d3b18c5e79ed5..7330d440982995 100644
--- a/llvm/test/Transforms/InstCombine/binop-cast.ll
+++ b/llvm/test/Transforms/InstCombine/binop-cast.ll
@@ -151,7 +151,7 @@ define i32 @or_sext_to_sel(i32 %x, i1 %y) {
 
 define <2 x i32> @or_sext_to_sel_constant_vec(<2 x i1> %y) {
 ; CHECK-LABEL: @or_sext_to_sel_constant_vec(
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 42, i32 -7>
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> splat (i32 -1), <2 x i32> <i32 42, i32 -7>
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %sext = sext <2 x i1> %y to <2 x i32>
@@ -162,7 +162,7 @@ define <2 x i32> @or_sext_to_sel_constant_vec(<2 x i1> %y) {
 define <2 x i32> @or_sext_to_sel_swap(<2 x i32> %px, <2 x i1> %y) {
 ; CHECK-LABEL: @or_sext_to_sel_swap(
 ; CHECK-NEXT:    [[X:%.*]] = mul <2 x i32> [[PX:%.*]], [[PX]]
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[X]]
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> splat (i32 -1), <2 x i32> [[X]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %x = mul <2 x i32> %px, %px ; thwart complexity-based canonicalization
diff --git a/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll b/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll
index a16ad4ddb806f6..e10f83f5eee0ba 100644
--- a/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll
+++ b/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll
@@ -179,7 +179,7 @@ define i8 @shl_or_commuted(i8 %x) {
 define <2 x i8> @shl_or_splat(<2 x i8> %x) {
 ; CHECK-LABEL: define <2 x i8> @shl_or_splat
 ; CHECK-SAME: (<2 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[BINOP:%.*]] = shl <2 x i8> <i8 22, i8 22>, [[X]]
+; CHECK-NEXT:    [[BINOP:%.*]] = shl <2 x i8> splat (i8 22), [[X]]
 ; CHECK-NEXT:    ret <2 x i8> [[BINOP]]
 ;
   %shift = shl <2 x i8> <i8 16, i8 16>, %x
diff --git a/llvm/test/Transforms/InstCombine/binop-select-cast-of-select-cond.ll b/llvm/test/Transforms/InstCombine/binop-select-cast-of-select-cond.ll
index b57b96db306ccc..a3cd0328e9b590 100644
--- a/llvm/test/Transforms/InstCombine/binop-select-cast-of-select-cond.ll
+++ b/llvm/test/Transforms/InstCombine/binop-select-cast-of-select-cond.ll
@@ -130,7 +130,7 @@ define i64 @select_zext_different_condition(i1 %c, i1 %d) {
 define <2 x i64> @vector_test(i1 %c) {
 ; CHECK-LABEL: define <2 x i64> @vector_test
 ; CHECK-SAME: (i1 [[C:%.*]]) {
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C]], <2 x i64> <i64 64, i64 64>, <2 x i64> <i64 1, i64 1>
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[C]], <2 x i64> splat (i64 64), <2 x i64> splat (i64 1)
 ; CHECK-NEXT:    [[EXT:%.*]] = zext i1 [[C]] to i64
 ; CHECK-NEXT:    [[VEC0:%.*]] = insertelement <2 x i64> poison, i64 [[EXT]], i64 0
 ; CHECK-NEXT:    [[VEC1:%.*]] = shufflevector <2 x i64> [[VEC0]], <2 x i64> poison, <2 x i32> zeroinitializer
@@ -215,8 +215,8 @@ define i6 @sub_select_zext_op_swapped_non_const_args(i1 %c, i6 %argT, i6 %argF)
 define <2 x i8> @vectorized_add(<2 x i1> %c, <2 x i8> %arg) {
 ; CHECK-LABEL: define <2 x i8> @vectorized_add
 ; CHECK-SAME: (<2 x i1> [[C:%.*]], <2 x i8> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[ARG]], <i8 1, i8 1>
-; CHECK-NEXT:    [[ADD:%.*]] = select <2 x i1> [[C]], <2 x i8> [[TMP1]], <2 x i8> <i8 1, i8 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[ARG]], splat (i8 1)
+; CHECK-NEXT:    [[ADD:%.*]] = select <2 x i1> [[C]], <2 x i8> [[TMP1]], <2 x i8> splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i8> [[ADD]]
 ;
   %zext = zext <2 x i1> %c to <2 x i8>
diff --git a/llvm/test/Transforms/InstCombine/binop-select.ll b/llvm/test/Transforms/InstCombine/binop-select.ll
index 6cd4132eadd77b..25f624ee134126 100644
--- a/llvm/test/Transforms/InstCombine/binop-select.ll
+++ b/llvm/test/Transforms/InstCombine/binop-select.ll
@@ -113,7 +113,7 @@ define i32 @test_sub_deduce_false(i32 %x, i32 %y) {
 define <2 x i8> @test_sub_dont_deduce_with_undef_cond_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @test_sub_dont_deduce_with_undef_cond_vec(
 ; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 9, i8 undef>
-; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[C_NOT]], <2 x i8> <i8 7, i8 7>, <2 x i8> [[Y:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[C_NOT]], <2 x i8> splat (i8 7), <2 x i8> [[Y:%.*]]
 ; CHECK-NEXT:    [[SUB:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[X]], <2 x i8> [[COND]])
 ; CHECK-NEXT:    ret <2 x i8> [[SUB]]
 ;
@@ -126,7 +126,7 @@ define <2 x i8> @test_sub_dont_deduce_with_undef_cond_vec(<2 x i8> %x, <2 x i8>
 define <2 x i8> @test_sub_dont_deduce_with_poison_cond_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @test_sub_dont_deduce_with_poison_cond_vec(
 ; CHECK-NEXT:    [[C_NOT:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 poison, i8 9>
-; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[C_NOT]], <2 x i8> <i8 7, i8 7>, <2 x i8> [[Y:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[C_NOT]], <2 x i8> splat (i8 7), <2 x i8> [[Y:%.*]]
 ; CHECK-NEXT:    [[SUB:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[X]], <2 x i8> [[COND]])
 ; CHECK-NEXT:    ret <2 x i8> [[SUB]]
 ;
@@ -369,7 +369,7 @@ define <2 x half> @fmul_sel_op1(i1 %b, <2 x half> %p) {
 define <2 x half> @fmul_sel_op1_use(i1 %b, <2 x half> %p) {
 ; CHECK-LABEL: @fmul_sel_op1_use(
 ; CHECK-NEXT:    [[X:%.*]] = fadd <2 x half> [[P:%.*]], <half 0xH3C00, half 0xH4000>
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[B:%.*]], <2 x half> zeroinitializer, <2 x half> <half 0xHFFFF, half 0xHFFFF>
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[B:%.*]], <2 x half> zeroinitializer, <2 x half> splat (half 0xHFFFF)
 ; CHECK-NEXT:    call void @use_v2f16(<2 x half> [[S]])
 ; CHECK-NEXT:    [[R:%.*]] = fmul nnan nsz <2 x half> [[X]], [[S]]
 ; CHECK-NEXT:    ret <2 x half> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/bit-checks.ll b/llvm/test/Transforms/InstCombine/bit-checks.ll
index 3e3426d951eb9e..97b93bdd23071e 100644
--- a/llvm/test/Transforms/InstCombine/bit-checks.ll
+++ b/llvm/test/Transforms/InstCombine/bit-checks.ll
@@ -290,8 +290,8 @@ define i32 @main4(i32 %argc) {
 
 define <2 x i32> @main4_splat(<2 x i32> %argc) {
 ; CHECK-LABEL: @main4_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARGC:%.*]], <i32 55, i32 55>
-; CHECK-NEXT:    [[AND_COND:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 55, i32 55>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARGC:%.*]], splat (i32 55)
+; CHECK-NEXT:    [[AND_COND:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 55)
 ; CHECK-NEXT:    [[STOREMERGE:%.*]] = zext <2 x i1> [[AND_COND]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[STOREMERGE]]
 ;
@@ -1371,7 +1371,7 @@ define <2 x i1> @no_masks_with_logical_or_vec_poison2(<2 x i32> %a, <2 x i32> %b
 ; CHECK-LABEL: @no_masks_with_logical_or_vec_poison2(
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne <2 x i32> [[B:%.*]], <i32 63, i32 poison>
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], [[C:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 -1)
 ; CHECK-NEXT:    [[OR2:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[CMP2]]
 ; CHECK-NEXT:    ret <2 x i1> [[OR2]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll
index 79665be01576a7..0551a5cb5e2f27 100644
--- a/llvm/test/Transforms/InstCombine/bit_ceil.ll
+++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll
@@ -268,11 +268,11 @@ define i32 @bit_ceil_32_sub_used_twice(i32 %x, ptr %p) {
 ; a vector version of @bit_ceil_32 above
 define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) {
 ; CHECK-LABEL: @bit_ceil_v4i32(
-; CHECK-NEXT:    [[DEC:%.*]] = add <4 x i32> [[X:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[DEC:%.*]] = add <4 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i32 0, 33) <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[DEC]], i1 false)
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <4 x i32> zeroinitializer, [[CTLZ]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    [[SEL:%.*]] = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[TMP2]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], splat (i32 31)
+; CHECK-NEXT:    [[SEL:%.*]] = shl nuw <4 x i32> splat (i32 1), [[TMP2]]
 ; CHECK-NEXT:    ret <4 x i32> [[SEL]]
 ;
   %dec = add <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
diff --git a/llvm/test/Transforms/InstCombine/bit_floor.ll b/llvm/test/Transforms/InstCombine/bit_floor.ll
index bd8aabf4431c0a..2872221e8aa87c 100644
--- a/llvm/test/Transforms/InstCombine/bit_floor.ll
+++ b/llvm/test/Transforms/InstCombine/bit_floor.ll
@@ -151,10 +151,10 @@ define i32 @bit_floor_shl_used_twice(i32 %x, ptr %p) {
 define <4 x i32> @bit_floor_v4i32(<4 x i32> %x) {
 ; CHECK-LABEL: @bit_floor_v4i32(
 ; CHECK-NEXT:    [[EQ0:%.*]] = icmp eq <4 x i32> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X]], splat (i32 1)
 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i32 1, 33) <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[LSHR]], i1 false)
-; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw <4 x i32> <i32 32, i32 32, i32 32, i32 32>, [[CTLZ]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[SUB]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw <4 x i32> splat (i32 32), [[CTLZ]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <4 x i32> splat (i32 1), [[SUB]]
 ; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> [[EQ0]], <4 x i32> zeroinitializer, <4 x i32> [[SHL]]
 ; CHECK-NEXT:    ret <4 x i32> [[SEL]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll b/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll
index 3744d8c9171c7d..49e77009f3b1a0 100644
--- a/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/bitcast-inseltpoison.ll
@@ -75,7 +75,7 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
 
 define <2 x i64> @is_negative(<4 x i32> %x) {
 ; CHECK-LABEL: @is_negative(
-; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[NOTNOT:%.*]] = bitcast <4 x i32> [[X_LOBIT]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[NOTNOT]]
 ;
@@ -91,7 +91,7 @@ define <2 x i64> @is_negative(<4 x i32> %x) {
 
 define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) {
 ; CHECK-LABEL: @is_negative_bonus_bitcast(
-; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <4 x i32> [[X_LOBIT]]
 ;
   %lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
@@ -107,7 +107,7 @@ define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) {
 define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) {
 ; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i4> [[X:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[TMP1]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -128)
 ; CHECK-NEXT:    ret <2 x i8> [[B]]
 ;
   %a = and <4 x i4> %x, <i4 0, i4 8, i4 0, i4 8>
diff --git a/llvm/test/Transforms/InstCombine/bitcast.ll b/llvm/test/Transforms/InstCombine/bitcast.ll
index 79e6370b7242f5..ce5a4635a9b0a2 100644
--- a/llvm/test/Transforms/InstCombine/bitcast.ll
+++ b/llvm/test/Transforms/InstCombine/bitcast.ll
@@ -77,7 +77,7 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
 
 define <2 x i64> @is_negative(<4 x i32> %x) {
 ; CHECK-LABEL: @is_negative(
-; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[NOTNOT:%.*]] = bitcast <4 x i32> [[X_LOBIT]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[NOTNOT]]
 ;
@@ -93,7 +93,7 @@ define <2 x i64> @is_negative(<4 x i32> %x) {
 
 define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) {
 ; CHECK-LABEL: @is_negative_bonus_bitcast(
-; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = ashr <4 x i32> [[X:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <4 x i32> [[X_LOBIT]]
 ;
   %lobit = ashr <4 x i32> %x, <i32 31, i32 31, i32 31, i32 31>
@@ -109,7 +109,7 @@ define <4 x i32> @is_negative_bonus_bitcast(<4 x i32> %x) {
 define <2 x i8> @canonicalize_bitcast_logic_with_constant(<4 x i4> %x) {
 ; CHECK-LABEL: @canonicalize_bitcast_logic_with_constant(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i4> [[X:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[TMP1]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -128)
 ; CHECK-NEXT:    ret <2 x i8> [[B]]
 ;
   %a = and <4 x i4> %x, <i4 0, i4 8, i4 0, i4 8>
@@ -796,7 +796,7 @@ define double @copysign_idiom_f64(double %x, i64 %mag) {
 
 define <2 x float> @copysign_idiom_vec(<2 x float> %x) {
 ; CHECK-LABEL: @copysign_idiom_vec(
-; CHECK-NEXT:    [[Y:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> [[X:%.*]])
+; CHECK-NEXT:    [[Y:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 1.000000e+00), <2 x float> [[X:%.*]])
 ; CHECK-NEXT:    ret <2 x float> [[Y]]
 ;
   %bits = bitcast <2 x float> %x to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/bitreverse.ll b/llvm/test/Transforms/InstCombine/bitreverse.ll
index fe44a7a77bdff8..2d303efbe66c82 100644
--- a/llvm/test/Transforms/InstCombine/bitreverse.ll
+++ b/llvm/test/Transforms/InstCombine/bitreverse.ll
@@ -362,7 +362,7 @@ define i32 @rev_i1(i1 %x) {
 
 define <2 x i8> @rev_v2i1(<2 x i1> %x) {
 ; CHECK-LABEL: @rev_v2i1(
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> <i8 -128, i8 -128>, <2 x i8> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> splat (i8 -128), <2 x i8> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %z = zext <2 x i1> %x to <2 x i8>
diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll
index ddc0430896e7d1..f7268ec9df0905 100644
--- a/llvm/test/Transforms/InstCombine/bswap-fold.ll
+++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll
@@ -38,7 +38,7 @@ define i32 @lshr8_i32(i32 %x) {
 define <2 x i32> @lshr16_v2i32(<2 x i32> %x) {
 ; CHECK-LABEL: @lshr16_v2i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = shl <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT:    [[R:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 16)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %s = lshr <2 x i32> %x, <i32 16, i32 16>
@@ -222,7 +222,7 @@ define i16 @test7(i32 %A) {
 
 define <2 x i16> @test7_vector(<2 x i32> %A) {
 ; CHECK-LABEL: @test7_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[D:%.*]] = trunc nuw <2 x i32> [[TMP1]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[D]]
 ;
@@ -246,7 +246,7 @@ define i16 @test8(i64 %A) {
 
 define <2 x i16> @test8_vector(<2 x i64> %A) {
 ; CHECK-LABEL: @test8_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 48, i64 48>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 48)
 ; CHECK-NEXT:    [[D:%.*]] = trunc nuw <2 x i64> [[TMP1]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[D]]
 ;
@@ -447,7 +447,7 @@ define <2 x i32> @bs_xor32vec(<2 x i32> %a, <2 x i32> %b) #0 {
 
 define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
 ; CHECK-LABEL: @bs_and32ivec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -1585053440)
 ; CHECK-NEXT:    [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i32> [[T2]]
 ;
@@ -458,7 +458,7 @@ define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
 
 define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
 ; CHECK-LABEL: @bs_or32ivec(
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 -1585053440)
 ; CHECK-NEXT:    [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i32> [[T2]]
 ;
@@ -469,7 +469,7 @@ define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
 
 define <2 x i32> @bs_xor32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
 ; CHECK-LABEL: @bs_xor32ivec(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], <i32 -1585053440, i32 -1585053440>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], splat (i32 -1585053440)
 ; CHECK-NEXT:    [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i32> [[T2]]
 ;
@@ -837,8 +837,8 @@ define i32 @bs_active_high7(i32 %0) {
 
 define <2 x i64> @bs_active_high4(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_high4(
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 4, i64 4>
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 240, i64 240>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], splat (i64 4)
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 240)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = shl <2 x i64> %0, <i64 60, i64 60>
@@ -849,7 +849,7 @@ define <2 x i64> @bs_active_high4(<2 x i64> %0) {
 define <2 x i64> @bs_active_high_different(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_high_different(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 57>
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], <i64 56, i64 56>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], splat (i64 56)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = shl <2 x i64> %0, <i64 56, i64 57>
@@ -932,7 +932,7 @@ define i32 @bs_active_byte_3h(i32 %0) {
 define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) {
 ; CHECK-LABEL: @bs_active_byte_3h_v2(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 8388608, i32 65536>
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], <i32 8, i32 8>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], splat (i32 8)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %2 = and <2 x i32> %0, <i32 8388608, i32 65536>  ; 0x0080'0000, 0x0001'0000
@@ -966,7 +966,7 @@ define i16 @bs_active_low1(i16 %0) {
 
 define <2 x i32> @bs_active_low8(<2 x i32> %0) {
 ; CHECK-LABEL: @bs_active_low8(
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], <i32 24, i32 24>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], splat (i32 24)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %2 = and <2 x i32> %0, <i32 255, i32 255>
@@ -977,7 +977,7 @@ define <2 x i32> @bs_active_low8(<2 x i32> %0) {
 define <2 x i32> @bs_active_low_different(<2 x i32> %0) {
 ; CHECK-LABEL: @bs_active_low_different(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 2, i32 128>
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], <i32 24, i32 24>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], splat (i32 24)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %2 = and <2 x i32> %0, <i32 2, i32 128>
@@ -1060,7 +1060,7 @@ define i32 @bs_active_byte_2l(i32 %0) {
 define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_byte_2l_v2(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], <i64 256, i64 65280>
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], <i64 40, i64 40>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], splat (i64 40)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = and <2 x i64> %0, <i64 256, i64 65280>  ; 0x0100, 0xff00
diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll
index fa61bc841f0199..44e1616af86c54 100644
--- a/llvm/test/Transforms/InstCombine/bswap.ll
+++ b/llvm/test/Transforms/InstCombine/bswap.ll
@@ -110,10 +110,10 @@ define <2 x i32> @test2_vector(<2 x i32> %arg) {
 define <2 x i32> @test2_vector_poison(<2 x i32> %arg) {
 ; CHECK-LABEL: @test2_vector_poison(
 ; CHECK-NEXT:    [[T2:%.*]] = shl <2 x i32> [[ARG:%.*]], <i32 24, i32 poison>
-; CHECK-NEXT:    [[T4:%.*]] = shl <2 x i32> [[ARG]], <i32 8, i32 8>
+; CHECK-NEXT:    [[T4:%.*]] = shl <2 x i32> [[ARG]], splat (i32 8)
 ; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[T4]], <i32 16711680, i32 poison>
 ; CHECK-NEXT:    [[T6:%.*]] = or disjoint <2 x i32> [[T2]], [[T5]]
-; CHECK-NEXT:    [[T8:%.*]] = lshr <2 x i32> [[ARG]], <i32 8, i32 8>
+; CHECK-NEXT:    [[T8:%.*]] = lshr <2 x i32> [[ARG]], splat (i32 8)
 ; CHECK-NEXT:    [[T9:%.*]] = and <2 x i32> [[T8]], <i32 65280, i32 poison>
 ; CHECK-NEXT:    [[T10:%.*]] = or disjoint <2 x i32> [[T6]], [[T9]]
 ; CHECK-NEXT:    [[T12:%.*]] = lshr <2 x i32> [[ARG]], <i32 24, i32 poison>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll
index cde8efbafc5e5a..b7aed751a98977 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-ashr-shl-to-masking.ll
@@ -371,7 +371,7 @@ define i8 @positive_biggershl_shlnuwnsw_ashrexact(i8 %x) {
 
 define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @positive_samevar_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[Y:%.*]]
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
@@ -386,7 +386,7 @@ define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) {
 
 define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec(
-; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[X:%.*]], <i8 -8, i8 -8>
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -8)
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
   %tmp0 = ashr <2 x i8> %x, <i8 3, i8 3>
@@ -397,7 +397,7 @@ define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) {
 define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef0(
 ; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 3>
-; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], splat (i8 3)
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
   %tmp0 = ashr <3 x i8> %x, <i8 3, i8 undef, i8 3>
@@ -407,7 +407,7 @@ define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) {
 
 define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef1(
-; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], <i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 3, i8 undef, i8 3>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
@@ -428,8 +428,8 @@ define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) {
 
 define <2 x i8> @positive_biggerashr_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @positive_biggerashr_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 3, i8 3>
-; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], <i8 -8, i8 -8>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 3)
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -8)
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
   %tmp0 = ashr <2 x i8> %x, <i8 6, i8 6>
@@ -440,7 +440,7 @@ define <2 x i8> @positive_biggerashr_vec(<2 x i8> %x) {
 define <3 x i8> @positive_biggerashr_vec_undef0(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_biggerashr_vec_undef0(
 ; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], <i8 6, i8 undef, i8 6>
-; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], splat (i8 3)
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
   %tmp0 = ashr <3 x i8> %x, <i8 6, i8 undef, i8 6>
@@ -450,7 +450,7 @@ define <3 x i8> @positive_biggerashr_vec_undef0(<3 x i8> %x) {
 
 define <3 x i8> @positive_biggerashr_vec_undef1(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_biggerashr_vec_undef1(
-; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], <i8 6, i8 6, i8 6>
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], splat (i8 6)
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 3, i8 undef, i8 3>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
@@ -472,8 +472,8 @@ define <3 x i8> @positive_biggerashr_vec_undef2(<3 x i8> %x) {
 
 define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @positive_biggershl_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 3>
-; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], <i8 -64, i8 -64>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 3)
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -64)
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
   %tmp0 = ashr <2 x i8> %x, <i8 3, i8 3>
@@ -484,7 +484,7 @@ define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) {
 define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_biggershl_vec_undef0(
 ; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 3>
-; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 6, i8 6, i8 6>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], splat (i8 6)
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
   %tmp0 = ashr <3 x i8> %x, <i8 3, i8 undef, i8 3>
@@ -494,7 +494,7 @@ define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) {
 
 define <3 x i8> @positive_biggershl_vec_undef1(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_biggershl_vec_undef1(
-; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], <i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <3 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 6, i8 undef, i8 6>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
@@ -563,7 +563,7 @@ define i8 @positive_biggershl_multiuse(i8 %x) {
 
 define <2 x i8> @positive_biggerashr_vec_nonsplat(<2 x i8> %x) {
 ; CHECK-LABEL: @positive_biggerashr_vec_nonsplat(
-; CHECK-NEXT:    [[TMP0:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i8> [[TMP0]], <i8 3, i8 6>
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
@@ -575,7 +575,7 @@ define <2 x i8> @positive_biggerashr_vec_nonsplat(<2 x i8> %x) {
 define <2 x i8> @positive_biggerLashr_vec_nonsplat(<2 x i8> %x) {
 ; CHECK-LABEL: @positive_biggerLashr_vec_nonsplat(
 ; CHECK-NEXT:    [[TMP0:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 3, i8 6>
-; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i8> [[TMP0]], <i8 3, i8 3>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i8> [[TMP0]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
   %tmp0 = ashr <2 x i8> %x, <i8 3, i8 6>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll
index b5ef1f466958d7..42c116f9d8f08c 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-negative-and-positive-thresholds.ll
@@ -450,8 +450,8 @@ define i32 @n19_oneuse9(i32 %x, i32 %replacement_low, i32 %replacement_high) {
 
 define <2 x i32> @t20_ult_slt_vec_splat(<2 x i32> %x, <2 x i32> %replacement_low, <2 x i32> %replacement_high) {
 ; CHECK-LABEL: @t20_ult_slt_vec_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 -16, i32 -16>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i32> [[X]], <i32 127, i32 127>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 -16)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i32> [[X]], splat (i32 127)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[X]]
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[REPLACEMENT_HIGH:%.*]], <2 x i32> [[TMP3]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
@@ -486,9 +486,9 @@ declare void @use2xi1(<2 x i1>)
 declare void @use(<2 x i1>)
 define <2 x i32> @t22_uge_slt(<2 x i32> %x, <2 x i32> %replacement_low, <2 x i32> %replacement_high) {
 ; CHECK-LABEL: @t22_uge_slt(
-; CHECK-NEXT:    [[T0:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 128, i32 128>
+; CHECK-NEXT:    [[T0:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 128)
 ; CHECK-NEXT:    [[T1:%.*]] = select <2 x i1> [[T0]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[REPLACEMENT_HIGH:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[X]], <i32 16, i32 16>
+; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[X]], splat (i32 16)
 ; CHECK-NEXT:    [[T3:%.*]] = icmp uge <2 x i32> [[T2]], <i32 144, i32 0>
 ; CHECK-NEXT:    call void @use2xi1(<2 x i1> [[T3]])
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T3]], <2 x i32> [[T1]], <2 x i32> [[X]]
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll
index c555970ea43484..58f171e3889f66 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll
@@ -312,7 +312,7 @@ define i32 @n16_oneuse6(i32 %x, i32 %replacement_low, i32 %replacement_high) {
 define <2 x i32> @t17_ult_slt_vec_splat(<2 x i32> %x, <2 x i32> %replacement_low, <2 x i32> %replacement_high) {
 ; CHECK-LABEL: @t17_ult_slt_vec_splat(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i32> [[X]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i32> [[X]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[REPLACEMENT_LOW:%.*]], <2 x i32> [[X]]
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[REPLACEMENT_HIGH:%.*]], <2 x i32> [[TMP3]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
@@ -341,7 +341,7 @@ define <2 x i32> @t18_ult_slt_vec_nonsplat(<2 x i32> %x, <2 x i32> %replacement_
 define <3 x i32> @t19_ult_slt_vec_poison0(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) {
 ; CHECK-LABEL: @t19_ult_slt_vec_poison0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <3 x i32> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], <i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <3 x i1> [[TMP1]], <3 x i32> [[REPLACEMENT_LOW:%.*]], <3 x i32> [[X]]
 ; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP2]], <3 x i32> [[REPLACEMENT_HIGH:%.*]], <3 x i32> [[TMP3]]
 ; CHECK-NEXT:    ret <3 x i32> [[R]]
@@ -355,7 +355,7 @@ define <3 x i32> @t19_ult_slt_vec_poison0(<3 x i32> %x, <3 x i32> %replacement_l
 define <3 x i32> @t20_ult_slt_vec_poison1(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) {
 ; CHECK-LABEL: @t20_ult_slt_vec_poison1(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <3 x i32> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], <i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <3 x i1> [[TMP1]], <3 x i32> [[REPLACEMENT_LOW:%.*]], <3 x i32> [[X]]
 ; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP2]], <3 x i32> [[REPLACEMENT_HIGH:%.*]], <3 x i32> [[TMP3]]
 ; CHECK-NEXT:    ret <3 x i32> [[R]]
@@ -369,7 +369,7 @@ define <3 x i32> @t20_ult_slt_vec_poison1(<3 x i32> %x, <3 x i32> %replacement_l
 define <3 x i32> @t21_ult_slt_vec_poison2(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) {
 ; CHECK-LABEL: @t21_ult_slt_vec_poison2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <3 x i32> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], <i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <3 x i1> [[TMP1]], <3 x i32> [[REPLACEMENT_LOW:%.*]], <3 x i32> [[X]]
 ; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP2]], <3 x i32> [[REPLACEMENT_HIGH:%.*]], <3 x i32> [[TMP3]]
 ; CHECK-NEXT:    ret <3 x i32> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
index 3d5696a0245139..8a7f1d259fe479 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
@@ -42,7 +42,7 @@ define i1 @pv(i8 %x, i8 %y) {
 
 define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @p1_vec_splat(
-; CHECK-NEXT:    [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], splat (i8 4)
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
@@ -81,7 +81,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1(<2 x i8> %x) {
 
 define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) {
 ; CHECK-LABEL: @p3_vec_splat_poison(
-; CHECK-NEXT:    [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], <i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], splat (i8 4)
 ; CHECK-NEXT:    ret <3 x i1> [[RET]]
 ;
   %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
index 21daeb8983a85d..5f5a393641f205 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
@@ -42,7 +42,7 @@ define i1 @pv(i8 %x, i8 %y) {
 
 define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @p1_vec_splat(
-; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
index e103fe94409869..0a0656a2273cd0 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll
@@ -30,7 +30,7 @@ define i1 @p0(i8 %x) {
 
 define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @p1_vec_splat(
-; CHECK-NEXT:    [[RET:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 4)
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
@@ -60,7 +60,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase(<2 x i8> %x) {
 
 define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) {
 ; CHECK-LABEL: @p3_vec_splat_poison(
-; CHECK-NEXT:    [[RET:%.*]] = icmp slt <3 x i8> [[X:%.*]], <i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt <3 x i8> [[X:%.*]], splat (i8 4)
 ; CHECK-NEXT:    ret <3 x i1> [[RET]]
 ;
   %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
index bbd733e86a32de..14d634c493780f 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll
@@ -39,7 +39,7 @@ define i1 @p0() {
 define <2 x i1> @p1_vec_splat() {
 ; CHECK-LABEL: @p1_vec_splat(
 ; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
-; CHECK-NEXT:    [[RET:%.*]] = icmp sgt <2 x i8> [[X]], <i8 3, i8 3>
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt <2 x i8> [[X]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %x = call <2 x i8> @gen2x8()
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
index b167c8ad25aa94..c563c7c72b9c5c 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll
@@ -39,7 +39,7 @@ define i1 @p0() {
 define <2 x i1> @p1_vec_splat() {
 ; CHECK-LABEL: @p1_vec_splat(
 ; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
-; CHECK-NEXT:    [[RET:%.*]] = icmp slt <2 x i8> [[X]], <i8 4, i8 4>
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt <2 x i8> [[X]], splat (i8 4)
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %x = call <2 x i8> @gen2x8()
@@ -75,7 +75,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase() {
 define <3 x i1> @p3_vec_splat_poison() {
 ; CHECK-LABEL: @p3_vec_splat_poison(
 ; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @gen3x8()
-; CHECK-NEXT:    [[RET:%.*]] = icmp slt <3 x i8> [[X]], <i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[RET:%.*]] = icmp slt <3 x i8> [[X]], splat (i8 4)
 ; CHECK-NEXT:    ret <3 x i1> [[RET]]
 ;
   %x = call <3 x i8> @gen3x8()
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
index 82815024477328..a5aa887a0cb6ac 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll
@@ -30,7 +30,7 @@ define i1 @p0(i8 %x) {
 
 define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @p1_vec_splat(
-; CHECK-NEXT:    [[RET:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[RET:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll
index 1dac73df387896..c6a69688d0287e 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll
@@ -42,7 +42,7 @@ define i1 @pv(i8 %x, i8 %y) {
 
 define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @p1_vec_splat(
-; CHECK-NEXT:    [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult <2 x i8> [[X:%.*]], splat (i8 4)
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
@@ -81,7 +81,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1(<2 x i8> %x) {
 
 define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) {
 ; CHECK-LABEL: @p3_vec_splat_poison(
-; CHECK-NEXT:    [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], <i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], splat (i8 4)
 ; CHECK-NEXT:    ret <3 x i1> [[RET]]
 ;
   %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll
index e8781407cb647f..1a1a1d08741bed 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll
@@ -51,7 +51,7 @@ define i1 @pv(i8 %y) {
 define <2 x i1> @p1_vec_splat() {
 ; CHECK-LABEL: @p1_vec_splat(
 ; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
-; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X]], <i8 3, i8 3>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %x = call <2 x i8> @gen2x8()
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll
index 54f00321c4cf02..fe9f26f26a1877 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll
@@ -51,7 +51,7 @@ define i1 @pv(i8 %y) {
 define <2 x i1> @p1_vec_splat() {
 ; CHECK-LABEL: @p1_vec_splat(
 ; CHECK-NEXT:    [[X:%.*]] = call <2 x i8> @gen2x8()
-; CHECK-NEXT:    [[RET:%.*]] = icmp ult <2 x i8> [[X]], <i8 4, i8 4>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult <2 x i8> [[X]], splat (i8 4)
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %x = call <2 x i8> @gen2x8()
@@ -98,7 +98,7 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1() {
 define <3 x i1> @p3_vec_splat_poison() {
 ; CHECK-LABEL: @p3_vec_splat_poison(
 ; CHECK-NEXT:    [[X:%.*]] = call <3 x i8> @gen3x8()
-; CHECK-NEXT:    [[RET:%.*]] = icmp ult <3 x i8> [[X]], <i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ult <3 x i8> [[X]], splat (i8 4)
 ; CHECK-NEXT:    ret <3 x i1> [[RET]]
 ;
   %x = call <3 x i8> @gen3x8()
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll
index 7eda7bb58f2700..9a0f073071b17b 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll
@@ -42,7 +42,7 @@ define i1 @pv(i8 %x, i8 %y) {
 
 define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @p1_vec_splat(
-; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
   %tmp0 = and <2 x i8> %x, <i8 3, i8 3>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-fcmp-inf.ll b/llvm/test/Transforms/InstCombine/canonicalize-fcmp-inf.ll
index 297dc647e7c67f..a85d7932f9b7ef 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-fcmp-inf.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-fcmp-inf.ll
@@ -180,7 +180,7 @@ define i1 @oge_pinf_fmf(half %x) {
 define <2 x i1> @olt_pinf_vec(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x i1> @olt_pinf_vec(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp one <2 x half> [[X]], <half 0xH7C00, half 0xH7C00>
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp one <2 x half> [[X]], splat (half 0xH7C00)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %cmp = fcmp olt <2 x half> %x, <half 0xH7c00, half 0xH7c00>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll
index b706ff15f569cb..7bc72b8a6c2d9b 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-lack-of-signed-truncation-check.ll
@@ -44,8 +44,8 @@ define i1 @pb(i65 %x) {
 
 define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @p1_vec_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 4, i8 4>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], <i8 8, i8 8>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 4)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], splat (i8 8)
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
   %tmp0 = shl <2 x i8> %x, <i8 5, i8 5>
@@ -70,7 +70,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
 define <3 x i1> @p3_vec_undef0(<3 x i8> %x) {
 ; CHECK-LABEL: @p3_vec_undef0(
 ; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 undef, i8 5>
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 5, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], splat (i8 5)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <3 x i8> [[TMP1]], [[X]]
 ; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
 ;
@@ -82,7 +82,7 @@ define <3 x i1> @p3_vec_undef0(<3 x i8> %x) {
 
 define <3 x i1> @p4_vec_undef1(<3 x i8> %x) {
 ; CHECK-LABEL: @p4_vec_undef1(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 5, i8 5>
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], splat (i8 5)
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 undef, i8 5>
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <3 x i8> [[TMP1]], [[X]]
 ; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
@@ -222,7 +222,7 @@ define i1 @n2(i8 %x, i8 %y) {
 
 define <2 x i1> @n3_vec_nonsplat(<2 x i8> %x) {
 ; CHECK-LABEL: @n3_vec_nonsplat(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 5)
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 5, i8 3>
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i8> [[TMP1]], [[X]]
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
index 5a58fc96c6643f..67096e19e9a02f 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll
@@ -30,7 +30,7 @@ define i1 @p0(i8 %x, i8 %y) {
 
 define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @p1_vec(
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i8> splat (i8 -1), [[Y:%.*]]
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ule <2 x i8> [[X:%.*]], [[TMP0]]
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
index edd528b500e557..6f7b5c2a42c2e9 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll
@@ -30,7 +30,7 @@ define i1 @p0(i8 %x, i8 %y) {
 
 define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @p1_vec(
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i8> splat (i8 -1), [[Y:%.*]]
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[TMP0]]
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll
index fd56324f10dc38..8e1e5a56a59d02 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-eq-to-icmp-ule.ll
@@ -42,7 +42,7 @@ define i1 @p0(i8 %x, i8 %y) {
 
 define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @p1_vec(
-; CHECK-NEXT:    [[T0:%.*]] = shl nuw <2 x i8> <i8 1, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[Y:%.*]]
 ; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
 ; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y]]
 ; CHECK-NEXT:    [[RET:%.*]] = icmp eq <2 x i8> [[X_HIGHBITS]], zeroinitializer
@@ -74,7 +74,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) {
 
 define <3 x i1> @p3_vec_poison0(<3 x i8> %x, <3 x i8> %y) {
 ; CHECK-LABEL: @p3_vec_poison0(
-; CHECK-NEXT:    [[T0:%.*]] = shl nuw <3 x i8> <i8 1, i8 1, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw <3 x i8> splat (i8 1), [[Y:%.*]]
 ; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
 ; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
 ; CHECK-NEXT:    [[RET:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll
index 4d8ce5d9a6cca0..8d6dfecffcd14a 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v3-and-icmp-ne-to-icmp-ugt.ll
@@ -42,7 +42,7 @@ define i1 @p0(i8 %x, i8 %y) {
 
 define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @p1_vec(
-; CHECK-NEXT:    [[T0:%.*]] = shl nuw <2 x i8> <i8 1, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[Y:%.*]]
 ; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
 ; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <2 x i8> [[X:%.*]], [[Y]]
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i8> [[X_HIGHBITS]], zeroinitializer
@@ -74,7 +74,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) {
 
 define <3 x i1> @p3_vec_poison0(<3 x i8> %x, <3 x i8> %y) {
 ; CHECK-LABEL: @p3_vec_poison0(
-; CHECK-NEXT:    [[T0:%.*]] = shl nuw <3 x i8> <i8 1, i8 1, i8 1>, [[Y:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw <3 x i8> splat (i8 1), [[Y:%.*]]
 ; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
 ; CHECK-NEXT:    [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y]]
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll
index 5fab93092a050e..12aa83636223e2 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll
@@ -40,9 +40,9 @@ define i1 @p0(i8 %x, i8 %y) {
 
 define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @p1_vec(
-; CHECK-NEXT:    [[T0:%.*]] = shl nsw <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[Y:%.*]]
 ; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i8> <i8 -1, i8 -1>, [[Y]]
+; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i8> splat (i8 -1), [[Y]]
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ule <2 x i8> [[X:%.*]], [[T1]]
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
@@ -58,7 +58,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) {
 ; CHECK-LABEL: @p2_vec_poison0(
 ; CHECK-NEXT:    [[T0:%.*]] = shl nsw <3 x i8> <i8 -1, i8 poison, i8 -1>, [[Y:%.*]]
 ; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i8> <i8 -1, i8 -1, i8 -1>, [[Y]]
+; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i8> splat (i8 -1), [[Y]]
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ule <3 x i8> [[X:%.*]], [[T1]]
 ; CHECK-NEXT:    ret <3 x i1> [[RET]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll
index 40a67ce1d60cb4..4e840a6fd1b640 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll
@@ -40,9 +40,9 @@ define i1 @p0(i8 %x, i8 %y) {
 
 define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @p1_vec(
-; CHECK-NEXT:    [[T0:%.*]] = shl nsw <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[Y:%.*]]
 ; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i8> <i8 -1, i8 -1>, [[Y]]
+; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i8> splat (i8 -1), [[Y]]
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[T1]]
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
@@ -58,7 +58,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) {
 ; CHECK-LABEL: @p2_vec_poison0(
 ; CHECK-NEXT:    [[T0:%.*]] = shl nsw <3 x i8> <i8 -1, i8 poison, i8 -1>, [[Y:%.*]]
 ; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i8> <i8 -1, i8 -1, i8 -1>, [[Y]]
+; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i8> splat (i8 -1), [[Y]]
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ugt <3 x i8> [[X:%.*]], [[T1]]
 ; CHECK-NEXT:    ret <3 x i1> [[RET]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
index 719fdd036a01b6..2806bf69a24f2c 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
@@ -371,7 +371,7 @@ define i8 @positive_biggershl_shlnuwnsw_lshrexact(i8 %x) {
 
 define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @positive_samevar_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw <2 x i8> <i8 -1, i8 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[Y:%.*]]
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
@@ -386,7 +386,7 @@ define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) {
 
 define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec(
-; CHECK-NEXT:    [[T0:%.*]] = and <2 x i8> [[X:%.*]], <i8 -8, i8 -8>
+; CHECK-NEXT:    [[T0:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -8)
 ; CHECK-NEXT:    ret <2 x i8> [[T0]]
 ;
   %t0 = lshr <2 x i8> %x, <i8 3, i8 3>
@@ -397,7 +397,7 @@ define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) {
 define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef0(
 ; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 3>
-; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[T0]], <i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[T0]], splat (i8 3)
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
   %t0 = lshr <3 x i8> %x, <i8 3, i8 undef, i8 3>
@@ -407,7 +407,7 @@ define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) {
 
 define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef1(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], <i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[T0]], <i8 3, i8 undef, i8 3>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
@@ -428,8 +428,8 @@ define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) {
 
 define <2 x i8> @positive_biggerlshr_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @positive_biggerlshr_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 3, i8 3>
-; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], <i8 24, i8 24>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 3)
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], splat (i8 24)
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
   %t0 = lshr <2 x i8> %x, <i8 6, i8 6>
@@ -440,7 +440,7 @@ define <2 x i8> @positive_biggerlshr_vec(<2 x i8> %x) {
 define <3 x i8> @positive_biggerlshr_vec_undef0(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_biggerlshr_vec_undef0(
 ; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], <i8 6, i8 undef, i8 6>
-; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[T0]], <i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[T0]], splat (i8 3)
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
   %t0 = lshr <3 x i8> %x, <i8 6, i8 undef, i8 6>
@@ -450,7 +450,7 @@ define <3 x i8> @positive_biggerlshr_vec_undef0(<3 x i8> %x) {
 
 define <3 x i8> @positive_biggerlshr_vec_undef1(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_biggerlshr_vec_undef1(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], <i8 6, i8 6, i8 6>
+; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], splat (i8 6)
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[T0]], <i8 3, i8 undef, i8 3>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
@@ -472,8 +472,8 @@ define <3 x i8> @positive_biggerlshr_vec_undef2(<3 x i8> %x) {
 
 define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @positive_biggershl_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 3>
-; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], <i8 -64, i8 -64>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 3)
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], splat (i8 -64)
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
   %t0 = lshr <2 x i8> %x, <i8 3, i8 3>
@@ -484,7 +484,7 @@ define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) {
 define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_biggershl_vec_undef0(
 ; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 3>
-; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[T0]], <i8 6, i8 6, i8 6>
+; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[T0]], splat (i8 6)
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
   %t0 = lshr <3 x i8> %x, <i8 3, i8 undef, i8 3>
@@ -494,7 +494,7 @@ define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) {
 
 define <3 x i8> @positive_biggershl_vec_undef1(<3 x i8> %x) {
 ; CHECK-LABEL: @positive_biggershl_vec_undef1(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], <i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[T0]], <i8 6, i8 undef, i8 6>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
@@ -563,7 +563,7 @@ define i8 @positive_biggershl_multiuse(i8 %x) {
 
 define <2 x i8> @positive_biggerlshr_vec_nonsplat(<2 x i8> %x) {
 ; CHECK-LABEL: @positive_biggerlshr_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i8> [[T0]], <i8 3, i8 6>
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
@@ -575,7 +575,7 @@ define <2 x i8> @positive_biggerlshr_vec_nonsplat(<2 x i8> %x) {
 define <2 x i8> @positive_biggerLlshr_vec_nonsplat(<2 x i8> %x) {
 ; CHECK-LABEL: @positive_biggerLlshr_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 3, i8 6>
-; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i8> [[T0]], <i8 3, i8 3>
+; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i8> [[T0]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
   %t0 = lshr <2 x i8> %x, <i8 3, i8 6>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
index c1e871dcccddcf..400d911929b996 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
@@ -124,7 +124,7 @@ define i32 @positive_biggerLshr_shlnuw_lshrexact(i32 %x) {
 
 define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @positive_samevar_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> <i32 -1, i32 -1>, [[Y:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> splat (i32 -1), [[Y:%.*]]
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
@@ -139,7 +139,7 @@ define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec(
-; CHECK-NEXT:    [[T0:%.*]] = and <2 x i32> [[X:%.*]], <i32 134217727, i32 134217727>
+; CHECK-NEXT:    [[T0:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 134217727)
 ; CHECK-NEXT:    ret <2 x i32> [[T0]]
 ;
   %t0 = shl <2 x i32> %x, <i32 5, i32 5>
@@ -150,7 +150,7 @@ define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) {
 define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef0(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], splat (i32 5)
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
   %t0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
@@ -160,7 +160,7 @@ define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) {
 
 define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef1(
-; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], splat (i32 5)
 ; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 5, i32 undef, i32 5>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
@@ -181,8 +181,8 @@ define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) {
 
 define <2 x i32> @positive_biggerShl_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerShl_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 5>
-; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], <i32 134217696, i32 134217696>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 5)
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], splat (i32 134217696)
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
   %t0 = shl <2 x i32> %x, <i32 10, i32 10>
@@ -193,7 +193,7 @@ define <2 x i32> @positive_biggerShl_vec(<2 x i32> %x) {
 define <3 x i32> @positive_biggerShl_vec_undef0(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerShl_vec_undef0(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], splat (i32 5)
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
   %t0 = shl <3 x i32> %x, <i32 10, i32 undef, i32 10>
@@ -203,7 +203,7 @@ define <3 x i32> @positive_biggerShl_vec_undef0(<3 x i32> %x) {
 
 define <3 x i32> @positive_biggerShl_vec_undef1(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerShl_vec_undef1(
-; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], splat (i32 10)
 ; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 5, i32 undef, i32 5>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
@@ -225,8 +225,8 @@ define <3 x i32> @positive_biggerShl_vec_undef2(<3 x i32> %x) {
 
 define <2 x i32> @positive_biggerLshr_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerLshr_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 5, i32 5>
-; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], <i32 4194303, i32 4194303>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 5)
+; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], splat (i32 4194303)
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
   %t0 = shl <2 x i32> %x, <i32 5, i32 5>
@@ -237,7 +237,7 @@ define <2 x i32> @positive_biggerLshr_vec(<2 x i32> %x) {
 define <3 x i32> @positive_biggerLshr_vec_undef0(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerLshr_vec_undef0(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], splat (i32 10)
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
   %t0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
@@ -247,7 +247,7 @@ define <3 x i32> @positive_biggerLshr_vec_undef0(<3 x i32> %x) {
 
 define <3 x i32> @positive_biggerLshr_vec_undef1(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerLshr_vec_undef1(
-; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], splat (i32 5)
 ; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 10, i32 undef, i32 10>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
@@ -346,7 +346,7 @@ define i32 @positive_biggerLshr_multiuse_extrainstr(i32 %x) {
 
 define <2 x i32> @positive_biggerShl_vec_nonsplat(<2 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerShl_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 5)
 ; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[T0]], <i32 5, i32 10>
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
@@ -358,7 +358,7 @@ define <2 x i32> @positive_biggerShl_vec_nonsplat(<2 x i32> %x) {
 define <2 x i32> @positive_biggerLshl_vec_nonsplat(<2 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerLshl_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 10>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[T0]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[T0]], splat (i32 5)
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
   %t0 = shl <2 x i32> %x, <i32 5, i32 10>
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll
index b9875fb9780fa9..5a12ffede78a57 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-signed-truncation-check.ll
@@ -44,8 +44,8 @@ define i1 @pb(i65 %x) {
 
 define <2 x i1> @p1_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @p1_vec_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 -4, i8 -4>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], <i8 -8, i8 -8>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -4)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], splat (i8 -8)
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
   %tmp0 = shl <2 x i8> %x, <i8 5, i8 5>
@@ -70,7 +70,7 @@ define <2 x i1> @p2_vec_nonsplat(<2 x i8> %x) {
 define <3 x i1> @p3_vec_undef0(<3 x i8> %x) {
 ; CHECK-LABEL: @p3_vec_undef0(
 ; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 undef, i8 5>
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 5, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], splat (i8 5)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <3 x i8> [[TMP1]], [[X]]
 ; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
 ;
@@ -82,7 +82,7 @@ define <3 x i1> @p3_vec_undef0(<3 x i8> %x) {
 
 define <3 x i1> @p4_vec_undef1(<3 x i8> %x) {
 ; CHECK-LABEL: @p4_vec_undef1(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], <i8 5, i8 5, i8 5>
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i8> [[X:%.*]], splat (i8 5)
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <3 x i8> [[TMP0]], <i8 5, i8 undef, i8 5>
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <3 x i8> [[TMP1]], [[X]]
 ; CHECK-NEXT:    ret <3 x i1> [[TMP2]]
@@ -222,7 +222,7 @@ define i1 @n2(i8 %x, i8 %y) {
 
 define <2 x i1> @n3_vec_nonsplat(<2 x i8> %x) {
 ; CHECK-LABEL: @n3_vec_nonsplat(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 5)
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr exact <2 x i8> [[TMP0]], <i8 5, i8 3>
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i8> [[TMP1]], [[X]]
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
diff --git a/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll b/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
index 68a386ec004237..dfefa11b70a020 100644
--- a/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
+++ b/llvm/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
@@ -533,7 +533,7 @@ define <2 x i1> @i32_vec_cast_cmp_oeq_vec_int_n0_sitofp(<2 x i32> %i) {
 define <2 x i1> @i32_vec_cast_cmp_oeq_vec_int_i32imax_sitofp(<2 x i32> %i) {
 ; CHECK-LABEL: @i32_vec_cast_cmp_oeq_vec_int_i32imax_sitofp(
 ; CHECK-NEXT:    [[F:%.*]] = sitofp <2 x i32> [[I:%.*]] to <2 x float>
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <2 x float> [[F]], <float 0x41E0000000000000, float 0x41E0000000000000>
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <2 x float> [[F]], splat (float 0x41E0000000000000)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %f = sitofp <2 x i32> %i to <2 x float>
diff --git a/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll b/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll
index 7b6d07a14a30e6..e9f6feba24cf67 100644
--- a/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll
+++ b/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll
@@ -631,7 +631,7 @@ define <3 x i1> @i32_cast_cmp_eq_int_0_sitofp_float_vec_poison(<3 x i32> %i) {
 
 define <3 x i1> @i64_cast_cmp_slt_int_1_sitofp_half_vec_poison(<3 x i64> %i) {
 ; CHECK-LABEL: @i64_cast_cmp_slt_int_1_sitofp_half_vec_poison(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <3 x i64> [[I:%.*]], <i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <3 x i64> [[I:%.*]], splat (i64 1)
 ; CHECK-NEXT:    ret <3 x i1> [[CMP]]
 ;
   %f = sitofp <3 x i64> %i to  <3 x half>
@@ -642,7 +642,7 @@ define <3 x i1> @i64_cast_cmp_slt_int_1_sitofp_half_vec_poison(<3 x i64> %i) {
 
 define <3 x i1> @i16_cast_cmp_sgt_int_m1_sitofp_float_vec_poison(<3 x i16> %i) {
 ; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_sitofp_float_vec_poison(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <3 x i16> [[I:%.*]], <i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <3 x i16> [[I:%.*]], splat (i16 -1)
 ; CHECK-NEXT:    ret <3 x i1> [[CMP]]
 ;
   %f = sitofp <3 x i16> %i to  <3 x float>
@@ -659,7 +659,7 @@ define <6 x i1> @i16_cast_cmp_sgt_int_m1_bitcast_vector_num_elements_sitofp(<3 x
 ; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_bitcast_vector_num_elements_sitofp(
 ; CHECK-NEXT:    [[F:%.*]] = sitofp <3 x i16> [[I:%.*]] to <3 x float>
 ; CHECK-NEXT:    [[B:%.*]] = bitcast <3 x float> [[F]] to <6 x i16>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <6 x i16> [[B]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <6 x i16> [[B]], splat (i16 -1)
 ; CHECK-NEXT:    ret <6 x i1> [[CMP]]
 ;
   %f = sitofp <3 x i16> %i to  <3 x float>
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
index 0e44dc1b8ca9c0..5e67eabbe0d302 100644
--- a/llvm/test/Transforms/InstCombine/cast.ll
+++ b/llvm/test/Transforms/InstCombine/cast.ll
@@ -347,8 +347,8 @@ define i1 @test31(i64 %A) {
 define <2 x i1> @test31vec(<2 x i64> %A) {
 ; ALL-LABEL: @test31vec(
 ; ALL-NEXT:    [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
-; ALL-NEXT:    [[C:%.*]] = and <2 x i32> [[B]], <i32 42, i32 42>
-; ALL-NEXT:    [[D:%.*]] = icmp eq <2 x i32> [[C]], <i32 10, i32 10>
+; ALL-NEXT:    [[C:%.*]] = and <2 x i32> [[B]], splat (i32 42)
+; ALL-NEXT:    [[D:%.*]] = icmp eq <2 x i32> [[C]], splat (i32 10)
 ; ALL-NEXT:    ret <2 x i1> [[D]]
 ;
   %B = trunc <2 x i64> %A to <2 x i32>
@@ -362,8 +362,8 @@ define <2 x i1> @test31vec(<2 x i64> %A) {
 
 define <2 x i1> @test32vec(<2 x i8> %A) {
 ; ALL-LABEL: @test32vec(
-; ALL-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; ALL-NEXT:    [[D:%.*]] = icmp eq <2 x i8> [[TMP1]], <i8 10, i8 10>
+; ALL-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 42)
+; ALL-NEXT:    [[D:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 10)
 ; ALL-NEXT:    ret <2 x i1> [[D]]
 ;
   %B = zext <2 x i8> %A to <2 x i16>
@@ -417,7 +417,7 @@ define i1 @test36(i32 %a) {
 
 define <2 x i1> @test36vec(<2 x i32> %a) {
 ; ALL-LABEL: @test36vec(
-; ALL-NEXT:    [[D:%.*]] = icmp sgt <2 x i32> [[A:%.*]], <i32 -1, i32 -1>
+; ALL-NEXT:    [[D:%.*]] = icmp sgt <2 x i32> [[A:%.*]], splat (i32 -1)
 ; ALL-NEXT:    ret <2 x i1> [[D]]
 ;
   %b = lshr <2 x i32> %a, <i32 31, i32 31>
@@ -480,8 +480,8 @@ define i16 @test40(i16 %a) {
 
 define <2 x i16> @test40vec(<2 x i16> %a) {
 ; ALL-LABEL: @test40vec(
-; ALL-NEXT:    [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 9, i16 9>
-; ALL-NEXT:    [[T5:%.*]] = shl <2 x i16> [[A]], <i16 8, i16 8>
+; ALL-NEXT:    [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], splat (i16 9)
+; ALL-NEXT:    [[T5:%.*]] = shl <2 x i16> [[A]], splat (i16 8)
 ; ALL-NEXT:    [[T32:%.*]] = or disjoint <2 x i16> [[T21]], [[T5]]
 ; ALL-NEXT:    ret <2 x i16> [[T32]]
 ;
@@ -615,8 +615,8 @@ define i64 @test46(i64 %A) {
 define <2 x i64> @test46vec(<2 x i64> %A) {
 ; ALL-LABEL: @test46vec(
 ; ALL-NEXT:    [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
-; ALL-NEXT:    [[C:%.*]] = shl <2 x i32> [[B]], <i32 8, i32 8>
-; ALL-NEXT:    [[D:%.*]] = and <2 x i32> [[C]], <i32 10752, i32 10752>
+; ALL-NEXT:    [[C:%.*]] = shl <2 x i32> [[B]], splat (i32 8)
+; ALL-NEXT:    [[D:%.*]] = and <2 x i32> [[C]], splat (i32 10752)
 ; ALL-NEXT:    [[E:%.*]] = zext nneg <2 x i32> [[D]] to <2 x i64>
 ; ALL-NEXT:    ret <2 x i64> [[E]]
 ;
@@ -773,7 +773,7 @@ define i64 @test56(i16 %A) {
 define <2 x i64> @test56vec(<2 x i16> %A) {
 ; ALL-LABEL: @test56vec(
 ; ALL-NEXT:    [[P353:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32>
-; ALL-NEXT:    [[P354:%.*]] = lshr <2 x i32> [[P353]], <i32 5, i32 5>
+; ALL-NEXT:    [[P354:%.*]] = lshr <2 x i32> [[P353]], splat (i32 5)
 ; ALL-NEXT:    [[P355:%.*]] = zext nneg <2 x i32> [[P354]] to <2 x i64>
 ; ALL-NEXT:    ret <2 x i64> [[P355]]
 ;
@@ -798,7 +798,7 @@ define i64 @test57(i64 %A) {
 define <2 x i64> @test57vec(<2 x i64> %A) {
 ; ALL-LABEL: @test57vec(
 ; ALL-NEXT:    [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
-; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 8, i32 8>
+; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 8)
 ; ALL-NEXT:    [[E:%.*]] = zext nneg <2 x i32> [[C]] to <2 x i64>
 ; ALL-NEXT:    ret <2 x i64> [[E]]
 ;
@@ -1506,7 +1506,7 @@ define i8 @trunc_lshr_sext_exact(i8 %A) {
 
 define <2 x i8> @trunc_lshr_sext_uniform(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_sext_uniform(
-; ALL-NEXT:    [[D:%.*]] = ashr <2 x i8> [[A:%.*]], <i8 6, i8 6>
+; ALL-NEXT:    [[D:%.*]] = ashr <2 x i8> [[A:%.*]], splat (i8 6)
 ; ALL-NEXT:    ret <2 x i8> [[D]]
 ;
   %B = sext <2 x i8> %A to <2 x i32>
@@ -1552,7 +1552,7 @@ define <2 x i8> @trunc_lshr_sext_uses1(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_sext_uses1(
 ; ALL-NEXT:    [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32>
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[B]])
-; ALL-NEXT:    [[D:%.*]] = ashr <2 x i8> [[A]], <i8 6, i8 6>
+; ALL-NEXT:    [[D:%.*]] = ashr <2 x i8> [[A]], splat (i8 6)
 ; ALL-NEXT:    ret <2 x i8> [[D]]
 ;
   %B = sext <2 x i8> %A to <2 x i32>
@@ -1581,9 +1581,9 @@ define <2 x i8> @trunc_lshr_sext_uses3(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_sext_uses3(
 ; ALL-NEXT:    [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32>
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[B]])
-; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 6, i32 6>
+; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 6)
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[C]])
-; ALL-NEXT:    [[D:%.*]] = ashr <2 x i8> [[A]], <i8 6, i8 6>
+; ALL-NEXT:    [[D:%.*]] = ashr <2 x i8> [[A]], splat (i8 6)
 ; ALL-NEXT:    ret <2 x i8> [[D]]
 ;
   %B = sext <2 x i8 >%A to <2 x i32>
@@ -1596,7 +1596,7 @@ define <2 x i8> @trunc_lshr_sext_uses3(<2 x i8> %A) {
 
 define <2 x i8> @trunc_lshr_overshift_sext(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_overshift_sext(
-; ALL-NEXT:    [[D:%.*]] = ashr <2 x i8> [[A:%.*]], <i8 7, i8 7>
+; ALL-NEXT:    [[D:%.*]] = ashr <2 x i8> [[A:%.*]], splat (i8 7)
 ; ALL-NEXT:    ret <2 x i8> [[D]]
 ;
   %B = sext <2 x i8> %A to <2 x i32>
@@ -1622,9 +1622,9 @@ define i8 @trunc_lshr_overshift_sext_uses1(i8 %A) {
 define <2 x i8> @trunc_lshr_overshift_sext_uses2(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_overshift_sext_uses2(
 ; ALL-NEXT:    [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32>
-; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 8, i32 8>
+; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 8)
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[C]])
-; ALL-NEXT:    [[D:%.*]] = ashr <2 x i8> [[A]], <i8 7, i8 7>
+; ALL-NEXT:    [[D:%.*]] = ashr <2 x i8> [[A]], splat (i8 7)
 ; ALL-NEXT:    ret <2 x i8> [[D]]
 ;
   %B = sext <2 x i8> %A to <2 x i32>
@@ -1679,7 +1679,7 @@ define <2 x i8> @trunc_lshr_sext_wide_input_uses1(<2 x i16> %A) {
 ; ALL-LABEL: @trunc_lshr_sext_wide_input_uses1(
 ; ALL-NEXT:    [[B:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32>
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[B]])
-; ALL-NEXT:    [[TMP1:%.*]] = ashr <2 x i16> [[A]], <i16 9, i16 9>
+; ALL-NEXT:    [[TMP1:%.*]] = ashr <2 x i16> [[A]], splat (i16 9)
 ; ALL-NEXT:    [[D:%.*]] = trunc nsw <2 x i16> [[TMP1]] to <2 x i8>
 ; ALL-NEXT:    ret <2 x i8> [[D]]
 ;
@@ -1709,7 +1709,7 @@ define <2 x i8> @trunc_lshr_sext_wide_input_uses3(<2 x i16> %A) {
 ; ALL-LABEL: @trunc_lshr_sext_wide_input_uses3(
 ; ALL-NEXT:    [[B:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32>
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[B]])
-; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 9, i32 9>
+; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 9)
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[C]])
 ; ALL-NEXT:    [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i8>
 ; ALL-NEXT:    ret <2 x i8> [[D]]
@@ -1724,7 +1724,7 @@ define <2 x i8> @trunc_lshr_sext_wide_input_uses3(<2 x i16> %A) {
 
 define <2 x i8> @trunc_lshr_overshift_wide_input_sext(<2 x i16> %A) {
 ; ALL-LABEL: @trunc_lshr_overshift_wide_input_sext(
-; ALL-NEXT:    [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], <i16 15, i16 15>
+; ALL-NEXT:    [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], splat (i16 15)
 ; ALL-NEXT:    [[D:%.*]] = trunc nsw <2 x i16> [[TMP1]] to <2 x i8>
 ; ALL-NEXT:    ret <2 x i8> [[D]]
 ;
@@ -1751,7 +1751,7 @@ define i8 @trunc_lshr_overshift_sext_wide_input_uses1(i16 %A) {
 
 define <2 x i8> @trunc_lshr_overshift_sext_wide_input_uses2(<2 x i16> %A) {
 ; ALL-LABEL: @trunc_lshr_overshift_sext_wide_input_uses2(
-; ALL-NEXT:    [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], <i16 15, i16 15>
+; ALL-NEXT:    [[TMP1:%.*]] = ashr <2 x i16> [[A:%.*]], splat (i16 15)
 ; ALL-NEXT:    [[C:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[C]])
 ; ALL-NEXT:    [[D:%.*]] = trunc nsw <2 x i16> [[TMP1]] to <2 x i8>
@@ -1797,7 +1797,7 @@ define <2 x i16> @trunc_lshr_sext_narrow_input_uses1(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_sext_narrow_input_uses1(
 ; ALL-NEXT:    [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32>
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[B]])
-; ALL-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[A]], <i8 6, i8 6>
+; ALL-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[A]], splat (i8 6)
 ; ALL-NEXT:    [[D:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
 ; ALL-NEXT:    ret <2 x i16> [[D]]
 ;
@@ -1827,7 +1827,7 @@ define <2 x i16> @trunc_lshr_sext_narrow_input_uses3(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_sext_narrow_input_uses3(
 ; ALL-NEXT:    [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32>
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[B]])
-; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 6, i32 6>
+; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 6)
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[C]])
 ; ALL-NEXT:    [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i16>
 ; ALL-NEXT:    ret <2 x i16> [[D]]
@@ -1842,7 +1842,7 @@ define <2 x i16> @trunc_lshr_sext_narrow_input_uses3(<2 x i8> %A) {
 
 define <2 x i16> @trunc_lshr_overshift_narrow_input_sext(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_overshift_narrow_input_sext(
-; ALL-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[A:%.*]], <i8 7, i8 7>
+; ALL-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[A:%.*]], splat (i8 7)
 ; ALL-NEXT:    [[D:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
 ; ALL-NEXT:    ret <2 x i16> [[D]]
 ;
@@ -1870,7 +1870,7 @@ define i16 @trunc_lshr_overshift_sext_narrow_input_uses1(i8 %A) {
 define <2 x i16> @trunc_lshr_overshift_sext_narrow_input_uses2(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_overshift_sext_narrow_input_uses2(
 ; ALL-NEXT:    [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32>
-; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 8, i32 8>
+; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 8)
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[C]])
 ; ALL-NEXT:    [[D:%.*]] = trunc <2 x i32> [[C]] to <2 x i16>
 ; ALL-NEXT:    ret <2 x i16> [[D]]
@@ -1902,7 +1902,7 @@ define i16 @trunc_lshr_overshift_sext_narrow_input_uses3(i8 %A) {
 define <2 x i8> @trunc_lshr_overshift2_sext(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_overshift2_sext(
 ; ALL-NEXT:    [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32>
-; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 25, i32 25>
+; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 25)
 ; ALL-NEXT:    [[D:%.*]] = trunc nuw nsw <2 x i32> [[C]] to <2 x i8>
 ; ALL-NEXT:    ret <2 x i8> [[D]]
 ;
@@ -1930,7 +1930,7 @@ define i8 @trunc_lshr_overshift2_sext_uses1(i8 %A) {
 define <2 x i8> @trunc_lshr_overshift2_sext_uses2(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_overshift2_sext_uses2(
 ; ALL-NEXT:    [[B:%.*]] = sext <2 x i8> [[A:%.*]] to <2 x i32>
-; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 25, i32 25>
+; ALL-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], splat (i32 25)
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[C]])
 ; ALL-NEXT:    [[D:%.*]] = trunc nuw nsw <2 x i32> [[C]] to <2 x i8>
 ; ALL-NEXT:    ret <2 x i8> [[D]]
@@ -1983,7 +1983,7 @@ define i8 @trunc_lshr_zext_exact(i8 %A) {
 
 define <2 x i8> @trunc_lshr_zext_uniform(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_zext_uniform(
-; ALL-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[A:%.*]], <i8 6, i8 6>
+; ALL-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[A:%.*]], splat (i8 6)
 ; ALL-NEXT:    ret <2 x i8> [[TMP1]]
 ;
   %B = zext <2 x i8> %A to <2 x i32>
@@ -2029,7 +2029,7 @@ define <2 x i8> @trunc_lshr_zext_uses1(<2 x i8> %A) {
 ; ALL-LABEL: @trunc_lshr_zext_uses1(
 ; ALL-NEXT:    [[B:%.*]] = zext <2 x i8> [[A:%.*]] to <2 x i32>
 ; ALL-NEXT:    call void @use_v2i32(<2 x i32> [[B]])
-; ALL-NEXT:    [[C:%.*]] = lshr <2 x i8> [[A]], <i8 6, i8 6>
+; ALL-NEXT:    [[C:%.*]] = lshr <2 x i8> [[A]], splat (i8 6)
 ; ALL-NEXT:    ret <2 x i8> [[C]]
 ;
   %B = zext <2 x i8> %A to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
index 1dd0b17e9f46dd..97c84f40714682 100644
--- a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
+++ b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll
@@ -570,8 +570,8 @@ define i32 @mixed_clamp_to_i32_2(float %x) {
 
 define <2 x float> @mixed_clamp_to_float_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @mixed_clamp_to_float_vec(
-; CHECK-NEXT:    [[SI_MIN:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 255, i32 255>)
-; CHECK-NEXT:    [[R1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[SI_MIN]], <2 x i32> <i32 1, i32 1>)
+; CHECK-NEXT:    [[SI_MIN:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 255))
+; CHECK-NEXT:    [[R1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[SI_MIN]], <2 x i32> splat (i32 1))
 ; CHECK-NEXT:    [[R:%.*]] = uitofp nneg <2 x i32> [[R1]] to <2 x float>
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll b/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll
index 9a9f359fa80b4a..4505d91bfb7a7a 100644
--- a/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll
+++ b/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll
@@ -43,7 +43,7 @@ define i1 @bswap_ne_i32(i32 %x) {
 
 define <2 x i1> @bswap_eq_v2i64(<2 x i64> %x) {
 ; CHECK-LABEL: @bswap_eq_v2i64(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i64> [[X:%.*]], <i64 216172782113783808, i64 216172782113783808>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 216172782113783808)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %bs = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %x)
@@ -73,7 +73,7 @@ define i1 @ctlz_eq_zero_i32(i32 %x) {
 
 define <2 x i1> @ctlz_ne_zero_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: @ctlz_ne_zero_v2i32(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
@@ -93,7 +93,7 @@ define i1 @ctlz_eq_bw_minus_1_i32(i32 %x) {
 
 define <2 x i1> @ctlz_ne_bw_minus_1_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: @ctlz_ne_bw_minus_1_v2i32(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
@@ -114,8 +114,8 @@ define i1 @ctlz_eq_other_i32(i32 %x) {
 
 define <2 x i1> @ctlz_ne_other_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: @ctlz_ne_other_v2i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 -128, i32 -128>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 128, i32 128>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -128)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 128)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
@@ -211,7 +211,7 @@ define <2 x i1> @ctlz_ult_one_v2i32(<2 x i32> %x) {
 
 define <2 x i1> @ctlz_ult_other_v2i32(<2 x i32> %x) {
 ; CHECK-LABEL: @ctlz_ult_other_v2i32(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
@@ -223,7 +223,7 @@ define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, ptr %p) {
 ; CHECK-LABEL: @ctlz_ult_other_multiuse_v2i32(
 ; CHECK-NEXT:    [[LZ:%.*]] = tail call range(i32 0, 33) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false)
 ; CHECK-NEXT:    store <2 x i32> [[LZ]], ptr [[P:%.*]], align 8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X]], splat (i32 65535)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
@@ -234,7 +234,7 @@ define <2 x i1> @ctlz_ult_other_multiuse_v2i32(<2 x i32> %x, ptr %p) {
 
 define <2 x i1> @ctlz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
 ; CHECK-LABEL: @ctlz_ult_bw_minus_one_v2i32(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %lz = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 false)
@@ -285,7 +285,7 @@ define i1 @cttz_eq_zero_i33(i33 %x) {
 
 define <2 x i1> @cttz_ne_zero_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: @cttz_ne_zero_v2i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -306,7 +306,7 @@ define i1 @cttz_eq_bw_minus_1_i33(i33 %x) {
 
 define <2 x i1> @cttz_ne_bw_minus_1_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: @cttz_ne_bw_minus_1_v2i32(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], splat (i32 -2147483648)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
@@ -327,8 +327,8 @@ define i1 @cttz_eq_other_i33(i33 %x) {
 
 define <2 x i1> @cttz_ne_other_v2i32(<2 x i32> %a) {
 ; CHECK-LABEL: @cttz_ne_other_v2i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 31)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 16)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
@@ -417,7 +417,7 @@ define <2 x i1> @cttz_ult_one_v2i32(<2 x i32> %x) {
 
 define <2 x i1> @cttz_ult_other_v2i32(<2 x i32> %x) {
 ; CHECK-LABEL: @cttz_ult_other_v2i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 65535)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -430,7 +430,7 @@ define <2 x i1> @cttz_ult_other_multiuse_v2i32(<2 x i32> %x, ptr %p) {
 ; CHECK-LABEL: @cttz_ult_other_multiuse_v2i32(
 ; CHECK-NEXT:    [[TZ:%.*]] = tail call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false)
 ; CHECK-NEXT:    store <2 x i32> [[TZ]], ptr [[P:%.*]], align 8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], <i32 16, i32 16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TZ]], splat (i32 16)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %tz = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 false)
@@ -441,7 +441,7 @@ define <2 x i1> @cttz_ult_other_multiuse_v2i32(<2 x i32> %x, ptr %p) {
 
 define <2 x i1> @cttz_ult_bw_minus_one_v2i32(<2 x i32> %x) {
 ; CHECK-LABEL: @cttz_ult_bw_minus_one_v2i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 2147483647)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -492,7 +492,7 @@ define i1 @ctpop_eq_bitwidth_i8(i8 %x) {
 
 define <2 x i1> @ctpop_ne_bitwidth_v2i32(<2 x i32> %x) {
 ; CHECK-LABEL: @ctpop_ne_bitwidth_v2i32(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %pop = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x)
@@ -515,7 +515,7 @@ define i1 @ctpop_ugt_bitwidth_minus_one_i8(i8 %x, ptr %p) {
 
 define <2 x i1> @ctpop_ult_bitwidth_v2i32(<2 x i32> %x) {
 ; CHECK-LABEL: @ctpop_ult_bitwidth_v2i32(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %pop = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %x)
@@ -792,7 +792,7 @@ define i1 @bitreverse_ult_22_fail_not_equality_pred(i8 %x) {
 
 define <2 x i1> @bitreverse_vec_eq_2_2(<2 x i8> %x) {
 ; CHECK-LABEL: @bitreverse_vec_eq_2_2(
-; CHECK-NEXT:    [[Z:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 64, i8 64>
+; CHECK-NEXT:    [[Z:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 64)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %y = call <2 x i8> @llvm.bitreverse.v2i8(<2 x i8> %x)
diff --git a/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll b/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll
index ed8bfd5d669d9e..dcd79f58390023 100644
--- a/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll
@@ -203,7 +203,7 @@ define i1 @class_inf_or_fcmp_issubnormal(half %x) {
 define <2 x i1> @class_finite_or_fcmp_issubnormal_vector(<2 x half> %x) {
 ; CHECK-LABEL: @class_finite_or_fcmp_issubnormal_vector(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]])
-; CHECK-NEXT:    [[OR:%.*]] = fcmp one <2 x half> [[TMP1]], <half 0xH7C00, half 0xH7C00>
+; CHECK-NEXT:    [[OR:%.*]] = fcmp one <2 x half> [[TMP1]], splat (half 0xH7C00)
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
 ;
   %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
diff --git a/llvm/test/Transforms/InstCombine/compare-signs.ll b/llvm/test/Transforms/InstCombine/compare-signs.ll
index ede652869212c1..e5de4bb0cc4be9 100644
--- a/llvm/test/Transforms/InstCombine/compare-signs.ll
+++ b/llvm/test/Transforms/InstCombine/compare-signs.ll
@@ -50,7 +50,7 @@ define i32 @test3(i32 %a, i32 %b) nounwind readnone {
 define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
 ; CHECK-LABEL: @test3vec(
 ; CHECK-NEXT:    [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = icmp sgt <2 x i32> [[T2_UNSHIFTED]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[T2:%.*]] = icmp sgt <2 x i32> [[T2_UNSHIFTED]], splat (i32 -1)
 ; CHECK-NEXT:    [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
@@ -64,7 +64,7 @@ define <2 x i32> @test3vec(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
 define <2 x i32> @test3vec_poison1(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
 ; CHECK-LABEL: @test3vec_poison1(
 ; CHECK-NEXT:    [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], <i32 16777216, i32 16777216>
+; CHECK-NEXT:    [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], splat (i32 16777216)
 ; CHECK-NEXT:    [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
@@ -78,7 +78,7 @@ define <2 x i32> @test3vec_poison1(<2 x i32> %a, <2 x i32> %b) nounwind readnone
 define <2 x i32> @test3vec_poison2(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
 ; CHECK-LABEL: @test3vec_poison2(
 ; CHECK-NEXT:    [[T2_UNSHIFTED:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], <i32 131072, i32 131072>
+; CHECK-NEXT:    [[T2:%.*]] = icmp ult <2 x i32> [[T2_UNSHIFTED]], splat (i32 131072)
 ; CHECK-NEXT:    [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
@@ -93,8 +93,8 @@ define <2 x i32> @test3vec_poison2(<2 x i32> %a, <2 x i32> %b) nounwind readnone
 
 define <2 x i32> @test3vec_diff(<2 x i32> %a, <2 x i32> %b) nounwind readnone {
 ; CHECK-LABEL: @test3vec_diff(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], <i32 30, i32 30>
+; CHECK-NEXT:    [[T0:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 31)
+; CHECK-NEXT:    [[T1:%.*]] = lshr <2 x i32> [[B:%.*]], splat (i32 30)
 ; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T0]], [[T1]]
 ; CHECK-NEXT:    [[T3:%.*]] = zext <2 x i1> [[T2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
@@ -154,7 +154,7 @@ define i1 @test4a(i32 %a) {
 
 define <2 x i1> @test4a_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test4a_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %l = ashr <2 x i32> %a, <i32 31, i32 31>
@@ -194,7 +194,7 @@ define i1 @test4c(i64 %a) {
 
 define <2 x i1> @test4c_vec(<2 x i64> %a) {
 ; CHECK-LABEL: @test4c_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i64> [[A:%.*]], <i64 1, i64 1>
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i64> [[A:%.*]], splat (i64 1)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %l = ashr <2 x i64> %a, <i64 63, i64 63>
diff --git a/llvm/test/Transforms/InstCombine/compare-udiv.ll b/llvm/test/Transforms/InstCombine/compare-udiv.ll
index 20e719cafa6abd..f569025a91a01a 100644
--- a/llvm/test/Transforms/InstCombine/compare-udiv.ll
+++ b/llvm/test/Transforms/InstCombine/compare-udiv.ll
@@ -111,7 +111,7 @@ define i1 @test6(i32 %d) {
 
 define <2 x i1> @test6vec(<2 x i32> %d) {
 ; CHECK-LABEL: @test6vec(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], <i32 6, i32 6>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], splat (i32 6)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
 ;
   %div = udiv <2 x i32> <i32 5, i32 5>, %d
@@ -150,7 +150,7 @@ define i1 @test8(i32 %d) {
 
 define <2 x i1> @test8vec(<2 x i32> %d) {
 ; CHECK-LABEL: @test8vec(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
 ;
   %div = udiv <2 x i32> <i32 4, i32 4>, %d
@@ -170,7 +170,7 @@ define i1 @test9(i32 %d) {
 
 define <2 x i1> @test9vec(<2 x i32> %d) {
 ; CHECK-LABEL: @test9vec(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
 ;
   %div = udiv <2 x i32> <i32 4, i32 4>, %d
@@ -190,7 +190,7 @@ define i1 @test10(i32 %d) {
 
 define <2 x i1> @test10vec(<2 x i32> %d) {
 ; CHECK-LABEL: @test10vec(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[D:%.*]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
 ;
   %div = udiv <2 x i32> <i32 4, i32 4>, %d
@@ -210,7 +210,7 @@ define i1 @test11(i32 %d) {
 
 define <2 x i1> @test11vec(<2 x i32> %d) {
 ; CHECK-LABEL: @test11vec(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], splat (i32 4)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
 ;
   %div = udiv <2 x i32> <i32 4, i32 4>, %d
@@ -230,7 +230,7 @@ define i1 @test12(i32 %d) {
 
 define <2 x i1> @test12vec(<2 x i32> %d) {
 ; CHECK-LABEL: @test12vec(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
 ;
   %div = udiv <2 x i32> <i32 4, i32 4>, %d
@@ -250,7 +250,7 @@ define i1 @test13(i32 %d) {
 
 define <2 x i1> @test13vec(<2 x i32> %d) {
 ; CHECK-LABEL: @test13vec(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
 ;
   %div = udiv <2 x i32> <i32 4, i32 4>, %d
@@ -270,7 +270,7 @@ define i1 @test14(i32 %d) {
 
 define <2 x i1> @test14vec(<2 x i32> %d) {
 ; CHECK-LABEL: @test14vec(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> [[D:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
 ;
   %div = udiv <2 x i32> <i32 4, i32 4>, %d
diff --git a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
index 22a7c0c072d963..3e5468d2f8bed5 100644
--- a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
@@ -62,7 +62,7 @@ define ptr addrspace(1) @fold_2x_smaller_index_type(ptr addrspace(1) %p, i32 %m0
 define <2 x ptr> @fold_2x_vec_i64(<2 x ptr> %p, <2 x i64> %m0) {
 ; CHECK-LABEL: define <2 x ptr> @fold_2x_vec_i64
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M0:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[M0]], <i64 -2, i64 -2>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[M0]], splat (i64 -2)
 ; CHECK-NEXT:    [[P1:%.*]] = call align 2 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x ptr> [[P1]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll b/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll
index a63eeab6a4b1e9..ce3355b6df039e 100644
--- a/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll
+++ b/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll
@@ -333,7 +333,7 @@ entry:
 define <4 x half> @copysign_splat(<4 x half> %a) {
 ; CHECK-LABEL: @copysign_splat(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RET:%.*]] = call <4 x half> @llvm.copysign.v4f16(<4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, <4 x half> [[A:%.*]])
+; CHECK-NEXT:    [[RET:%.*]] = call <4 x half> @llvm.copysign.v4f16(<4 x half> splat (half 0xH3C00), <4 x half> [[A:%.*]])
 ; CHECK-NEXT:    ret <4 x half> [[RET]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll b/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll
index 12c608cd6c2bb9..74a1e318d77ede 100644
--- a/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/create-class-from-logic-fcmp.ll
@@ -1779,7 +1779,7 @@ define i1 @not_isfinite_or_zero_f16_daz(half %x) #1 {
 define <2 x i1> @not_isfinite_or_zero_v2f16_daz(<2 x half> %x) #1 {
 ; CHECK-LABEL: @not_isfinite_or_zero_v2f16_daz(
 ; CHECK-NEXT:    [[FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]])
-; CHECK-NEXT:    [[CMPINF:%.*]] = fcmp ueq <2 x half> [[FABS]], <half 0xH7C00, half 0xH7C00>
+; CHECK-NEXT:    [[CMPINF:%.*]] = fcmp ueq <2 x half> [[FABS]], splat (half 0xH7C00)
 ; CHECK-NEXT:    [[CMPZERO:%.*]] = fcmp oeq <2 x half> [[X]], zeroinitializer
 ; CHECK-NEXT:    [[CLASS:%.*]] = or <2 x i1> [[CMPZERO]], [[CMPINF]]
 ; CHECK-NEXT:    ret <2 x i1> [[CLASS]]
@@ -1810,7 +1810,7 @@ define i1 @not_isfinite_or_zero_f16_dynamic(half %x) #2 {
 define <2 x i1> @not_isfinite_or_zero_v2f16_dynamic(<2 x half> %x) #2 {
 ; CHECK-LABEL: @not_isfinite_or_zero_v2f16_dynamic(
 ; CHECK-NEXT:    [[FABS:%.*]] = call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X:%.*]])
-; CHECK-NEXT:    [[CMPINF:%.*]] = fcmp ueq <2 x half> [[FABS]], <half 0xH7C00, half 0xH7C00>
+; CHECK-NEXT:    [[CMPINF:%.*]] = fcmp ueq <2 x half> [[FABS]], splat (half 0xH7C00)
 ; CHECK-NEXT:    [[CMPZERO:%.*]] = fcmp oeq <2 x half> [[X]], zeroinitializer
 ; CHECK-NEXT:    [[CLASS:%.*]] = or <2 x i1> [[CMPZERO]], [[CMPINF]]
 ; CHECK-NEXT:    ret <2 x i1> [[CLASS]]
diff --git a/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll
index 15aa87f72c49a1..bdceeb5ef9785c 100644
--- a/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll
+++ b/llvm/test/Transforms/InstCombine/ctlz-cttz-shifts.ll
@@ -64,7 +64,7 @@ define i32 @shl_cttz_true(i32) {
 define <2 x i32> @vec2_lshr_ctlz_true(<2 x i32>) {
 ; CHECK-LABEL: define <2 x i32> @vec2_lshr_ctlz_true(
 ; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = add <2 x i32> [[TMP0]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CTLZ:%.*]] = add <2 x i32> [[TMP0]], splat (i32 9)
 ; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
 ;
   %div = lshr <2 x i32> <i32 8387584, i32 4276440>, %0
@@ -75,7 +75,7 @@ define <2 x i32> @vec2_lshr_ctlz_true(<2 x i32>) {
 define <2 x i32> @vec2_shl_nuw_ctlz_true(<2 x i32>) {
 ; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_ctlz_true(
 ; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = sub <2 x i32> splat (i32 9), [[TMP0]]
 ; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
 ;
   %shl = shl nuw <2 x i32> <i32 8387584, i32 4276440>, %0
@@ -86,7 +86,7 @@ define <2 x i32> @vec2_shl_nuw_ctlz_true(<2 x i32>) {
 define <2 x i32> @vec2_shl_nuw_nsw_ctlz_true(<2 x i32>) {
 ; CHECK-LABEL: define <2 x i32> @vec2_shl_nuw_nsw_ctlz_true(
 ; CHECK-SAME: <2 x i32> [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[CTLZ:%.*]] = sub <2 x i32> <i32 9, i32 9>, [[TMP0]]
+; CHECK-NEXT:    [[CTLZ:%.*]] = sub <2 x i32> splat (i32 9), [[TMP0]]
 ; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
 ;
   %shl = shl nuw nsw <2 x i32> <i32 8387584, i32 4276440>, %0
diff --git a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll
index bcfbce8dfd3d22..4c5d7a7dc011ba 100644
--- a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll
@@ -21,7 +21,7 @@ define i32 @ctpop1(i32 %0) {
 define <2 x i32> @ctpop1v(<2 x i32> %0) {
 ; CHECK-LABEL: @ctpop1v(
 ; CHECK-NEXT:    [[TMP2:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false)
-; CHECK-NEXT:    [[TMP3:%.*]] = sub nuw nsw <2 x i32> <i32 32, i32 32>, [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub nuw nsw <2 x i32> splat (i32 32), [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %2 = sub <2 x i32> zeroinitializer, %0
diff --git a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll
index 17997b25d096c1..e86d32de51f367 100644
--- a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll
@@ -102,7 +102,7 @@ define <2 x i32> @ctpop_shl2_1_vec(<2 x i32> %x) {
 
 define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) {
 ; CHECK-LABEL: @ctpop_lshr_intmin_intmin_plus1_vec_nz(
-; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> <i32 -2147483648, i32 -2147483647>, [[X1]]
 ; CHECK-NEXT:    [[CNT:%.*]] = call range(i32 1, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHR]])
 ; CHECK-NEXT:    ret <2 x i32> [[CNT]]
@@ -116,7 +116,7 @@ define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) {
 
 define <2 x i32> @ctpop_shl2_1_vec_nz(<2 x i32> %x) {
 ; CHECK-LABEL: @ctpop_shl2_1_vec_nz(
-; CHECK-NEXT:    ret <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 1)
 ;
   %and = and <2 x i32> %x, <i32 15 ,i32 15>
   %shl = shl <2 x i32> <i32 2 ,i32 1>, %and
@@ -140,7 +140,7 @@ define <2 x i64> @ctpop_x_and_negx_vec(<2 x i64> %x) {
 
 define <2 x i32> @ctpop_x_and_negx_vec_nz(<2 x i32> %x) {
 ; CHECK-LABEL: @ctpop_x_and_negx_vec_nz(
-; CHECK-NEXT:    ret <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 1)
 ;
   %x1 = or <2 x i32> %x, <i32 1 ,i32 1>
   %sub = sub <2 x i32> <i32 0 ,i32 0>, %x1
diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll
index 940bb868c4c615..217a3c308a2ddb 100644
--- a/llvm/test/Transforms/InstCombine/ctpop.ll
+++ b/llvm/test/Transforms/InstCombine/ctpop.ll
@@ -105,9 +105,9 @@ define i8 @mask_one_bit(i8 %x) {
 
 define <2 x i32> @mask_one_bit_splat(<2 x i32> %x, ptr %p) {
 ; CHECK-LABEL: @mask_one_bit_splat(
-; CHECK-NEXT:    [[A:%.*]] = and <2 x i32> [[X:%.*]], <i32 2048, i32 2048>
+; CHECK-NEXT:    [[A:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 2048)
 ; CHECK-NEXT:    store <2 x i32> [[A]], ptr [[P:%.*]], align 8
-; CHECK-NEXT:    [[R:%.*]] = lshr exact <2 x i32> [[A]], <i32 11, i32 11>
+; CHECK-NEXT:    [[R:%.*]] = lshr exact <2 x i32> [[A]], splat (i32 11)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %a = and <2 x i32> %x, <i32 2048, i32 2048>
@@ -146,7 +146,7 @@ define i7 @_parity_of_not_odd_type(i7 %x) {
 define <2 x i32> @_parity_of_not_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @_parity_of_not_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %neg = xor <2 x i32> %x, <i32 -1 ,i32 -1>
@@ -158,7 +158,7 @@ define <2 x i32> @_parity_of_not_vec(<2 x i32> %x) {
 define <2 x i32> @_parity_of_not_poison(<2 x i32> %x) {
 ; CHECK-LABEL: @_parity_of_not_poison(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %neg = xor <2 x i32> %x, <i32 poison ,i32 -1>
@@ -213,7 +213,7 @@ define i32 @ctpop_add_no_common_bits(i32 %a, i32 %b) {
 
 define <2 x i32> @ctpop_add_no_common_bits_vec(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @ctpop_add_no_common_bits_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> <i32 16, i32 16>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[A:%.*]], <2 x i32> [[B:%.*]], <2 x i32> splat (i32 16))
 ; CHECK-NEXT:    [[RES:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i32> [[RES]]
 ;
@@ -227,9 +227,9 @@ define <2 x i32> @ctpop_add_no_common_bits_vec(<2 x i32> %a, <2 x i32> %b) {
 
 define <2 x i32> @ctpop_add_no_common_bits_vec_use(<2 x i32> %a, <2 x i32> %b, ptr %p) {
 ; CHECK-LABEL: @ctpop_add_no_common_bits_vec_use(
-; CHECK-NEXT:    [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[CTPOP1:%.*]] = tail call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHL16]])
-; CHECK-NEXT:    [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[CTPOP2:%.*]] = tail call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[LSHL16]])
 ; CHECK-NEXT:    store <2 x i32> [[CTPOP2]], ptr [[P:%.*]], align 8
 ; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw <2 x i32> [[CTPOP1]], [[CTPOP2]]
@@ -246,10 +246,10 @@ define <2 x i32> @ctpop_add_no_common_bits_vec_use(<2 x i32> %a, <2 x i32> %b, p
 
 define <2 x i32> @ctpop_add_no_common_bits_vec_use2(<2 x i32> %a, <2 x i32> %b, ptr %p) {
 ; CHECK-LABEL: @ctpop_add_no_common_bits_vec_use2(
-; CHECK-NEXT:    [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[SHL16:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[CTPOP1:%.*]] = tail call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHL16]])
 ; CHECK-NEXT:    store <2 x i32> [[CTPOP1]], ptr [[P:%.*]], align 8
-; CHECK-NEXT:    [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[LSHL16:%.*]] = lshr <2 x i32> [[B:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[CTPOP2:%.*]] = tail call range(i32 0, 17) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[LSHL16]])
 ; CHECK-NEXT:    [[RES:%.*]] = add nuw nsw <2 x i32> [[CTPOP1]], [[CTPOP2]]
 ; CHECK-NEXT:    ret <2 x i32> [[RES]]
@@ -321,7 +321,7 @@ define i8 @sub_ctpop_unknown(i8 %a, i8 %b)  {
 
 define <2 x i32> @sub_ctpop_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @sub_ctpop_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[RES:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i32> [[RES]]
 ;
@@ -334,7 +334,7 @@ define <2 x i32> @sub_ctpop_vec_extra_use(<2 x i32> %a, ptr %p) {
 ; CHECK-LABEL: @sub_ctpop_vec_extra_use(
 ; CHECK-NEXT:    [[CNT:%.*]] = tail call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[A:%.*]])
 ; CHECK-NEXT:    store <2 x i32> [[CNT]], ptr [[P:%.*]], align 8
-; CHECK-NEXT:    [[RES:%.*]] = sub nuw nsw <2 x i32> <i32 32, i32 32>, [[CNT]]
+; CHECK-NEXT:    [[RES:%.*]] = sub nuw nsw <2 x i32> splat (i32 32), [[CNT]]
 ; CHECK-NEXT:    ret <2 x i32> [[RES]]
 ;
   %cnt = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
@@ -412,7 +412,7 @@ define <2 x i32> @parity_xor_vec(<2 x i32> %arg, <2 x i32> %arg1) {
 ; CHECK-LABEL: @parity_xor_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[ARG1:%.*]], [[ARG:%.*]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP1]])
-; CHECK-NEXT:    [[I4:%.*]] = and <2 x i32> [[TMP2]], <i32 1, i32 1>
+; CHECK-NEXT:    [[I4:%.*]] = and <2 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[I4]]
 ;
   %i = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %arg)
diff --git a/llvm/test/Transforms/InstCombine/dependent-ivs.ll b/llvm/test/Transforms/InstCombine/dependent-ivs.ll
index e4a042ff5fe515..218e619643dfbb 100644
--- a/llvm/test/Transforms/InstCombine/dependent-ivs.ll
+++ b/llvm/test/Transforms/InstCombine/dependent-ivs.ll
@@ -167,7 +167,7 @@ define void @int_iv_vector(<2 x i64> %base) {
 ; CHECK-NEXT:    [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[IV2:%.*]] = add <2 x i64> [[IV]], [[BASE]]
 ; CHECK-NEXT:    call void @use.v2i64(<2 x i64> [[IV2]])
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], <i64 4, i64 4>
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], splat (i64 4)
 ; CHECK-NEXT:    [[CMP:%.*]] = call i1 @get.i1()
 ; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
@@ -198,7 +198,7 @@ define void @int_iv_vector_poison_invalid(<2 x i64> %base) {
 ; CHECK-NEXT:    [[IV2:%.*]] = phi <2 x i64> [ [[IV2_NEXT:%.*]], [[LOOP]] ], [ [[BASE]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ <i64 0, i64 poison>, [[ENTRY]] ]
 ; CHECK-NEXT:    call void @use.v2i64(<2 x i64> [[IV2]])
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], <i64 4, i64 4>
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], splat (i64 4)
 ; CHECK-NEXT:    [[IV2_NEXT]] = add <2 x i64> [[IV_NEXT]], [[BASE]]
 ; CHECK-NEXT:    [[CMP:%.*]] = call i1 @get.i1()
 ; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
@@ -670,7 +670,7 @@ define void @ptr_iv_vector2(<2 x ptr> %base) {
 ; CHECK-NEXT:    [[IV:%.*]] = phi <2 x i64> [ [[IV_NEXT:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[IV_PTR:%.*]] = getelementptr i8, <2 x ptr> [[BASE]], <2 x i64> [[IV]]
 ; CHECK-NEXT:    call void @use.v2p0(<2 x ptr> [[IV_PTR]])
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], <i64 4, i64 4>
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw <2 x i64> [[IV]], splat (i64 4)
 ; CHECK-NEXT:    [[CMP:%.*]] = call i1 @get.i1()
 ; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
diff --git a/llvm/test/Transforms/InstCombine/div-shift.ll b/llvm/test/Transforms/InstCombine/div-shift.ll
index 9610746811a43a..3d58d8048bba4a 100644
--- a/llvm/test/Transforms/InstCombine/div-shift.ll
+++ b/llvm/test/Transforms/InstCombine/div-shift.ll
@@ -25,7 +25,7 @@ define i32 @t1(i16 zeroext %x, i32 %y) {
 define <2 x i32> @t1vec(<2 x i16> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t1vec(
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[D1:%.*]] = lshr <2 x i32> [[CONV]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[D1]]
 ;
@@ -236,7 +236,7 @@ define i32 @t10(i32 %x, i32 %y) {
 
 define <2 x i32> @t11(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t11(
-; CHECK-NEXT:    [[R:%.*]] = shl nuw nsw <2 x i32> <i32 1, i32 1>, [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shl nuw nsw <2 x i32> splat (i32 1), [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %shl = shl nsw <2 x i32> %x, %y
@@ -287,7 +287,7 @@ define i32 @t15(i32 %x, i32 %y) {
 
 define <2 x i32> @t16(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t16(
-; CHECK-NEXT:    [[R:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shl nuw <2 x i32> splat (i32 1), [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %shl = shl nuw <2 x i32> %x, %y
diff --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll
index e8a25ff44d0296..9db59b7ee57626 100644
--- a/llvm/test/Transforms/InstCombine/div.ll
+++ b/llvm/test/Transforms/InstCombine/div.ll
@@ -88,7 +88,7 @@ define i32 @udiv_by_minus1(i32 %A) {
 
 define <2 x i64> @udiv_by_minus1_vec(<2 x i64> %x) {
 ; CHECK-LABEL: @udiv_by_minus1_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> [[X:%.*]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 -1)
 ; CHECK-NEXT:    [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[DIV]]
 ;
@@ -109,7 +109,7 @@ define i32 @udiv_by_sext_all_ones(i1 %x, i32 %y) {
 
 define <2 x i32> @udiv_by_sext_all_ones_vec(<2 x i1> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @udiv_by_sext_all_ones_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[Y:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[Y:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[DIV]]
 ;
@@ -152,8 +152,8 @@ define i1 @test7(i32 %A) {
 
 define <2 x i1> @test7vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test7vec(
-; CHECK-NEXT:    [[A_OFF:%.*]] = add <2 x i32> [[A:%.*]], <i32 -20, i32 -20>
-; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> [[A_OFF]], <i32 10, i32 10>
+; CHECK-NEXT:    [[A_OFF:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -20)
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> [[A_OFF]], splat (i32 10)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = udiv <2 x i32> %A, <i32 10, i32 10>
@@ -174,7 +174,7 @@ define i1 @test8(i8 %A) {
 
 define <2 x i1> @test8vec(<2 x i8> %A) {
 ; CHECK-LABEL: @test8vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i8> [[A:%.*]], <i8 -11, i8 -11>
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i8> [[A:%.*]], splat (i8 -11)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = udiv <2 x i8> %A, <i8 123, i8 123>
@@ -195,7 +195,7 @@ define i1 @test9(i8 %A) {
 
 define <2 x i1> @test9vec(<2 x i8> %A) {
 ; CHECK-LABEL: @test9vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], <i8 -10, i8 -10>
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i8> [[A:%.*]], splat (i8 -10)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = udiv <2 x i8> %A, <i8 123, i8 123>
@@ -266,7 +266,7 @@ define i32 @test15(i32 %a, i32 %b) {
 
 define <2 x i64> @test16(<2 x i64> %x) {
 ; CHECK-LABEL: @test16(
-; CHECK-NEXT:    [[DIV:%.*]] = udiv <2 x i64> [[X:%.*]], <i64 192, i64 192>
+; CHECK-NEXT:    [[DIV:%.*]] = udiv <2 x i64> [[X:%.*]], splat (i64 192)
 ; CHECK-NEXT:    ret <2 x i64> [[DIV]]
 ;
   %shr = lshr <2 x i64> %x, <i64 5, i64 5>
@@ -286,7 +286,7 @@ define i32 @test19(i32 %x) {
 
 define <2 x i32> @test19vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test19vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[A:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[A]]
 ;
@@ -309,8 +309,8 @@ define i32 @test20(i32 %x) {
 define <2 x i32> @test20vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test20vec(
 ; CHECK-NEXT:    [[X_FR:%.*]] = freeze <2 x i32> [[X:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X_FR]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X_FR]], splat (i32 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 3)
 ; CHECK-NEXT:    [[A:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[X_FR]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[A]]
 ;
@@ -442,7 +442,7 @@ define i32 @test32(i32 %a, i32 %b) {
 
 define <2 x i64> @test33(<2 x i64> %x) {
 ; CHECK-LABEL: @test33(
-; CHECK-NEXT:    [[DIV:%.*]] = udiv exact <2 x i64> [[X:%.*]], <i64 192, i64 192>
+; CHECK-NEXT:    [[DIV:%.*]] = udiv exact <2 x i64> [[X:%.*]], splat (i64 192)
 ; CHECK-NEXT:    ret <2 x i64> [[DIV]]
 ;
   %shr = lshr exact <2 x i64> %x, <i64 5, i64 5>
@@ -464,7 +464,7 @@ define i8 @sdiv_negated_dividend_constant_divisor(i8 %x) {
 
 define <2 x i8> @sdiv_negated_dividend_constant_divisor_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @sdiv_negated_dividend_constant_divisor_vec_splat(
-; CHECK-NEXT:    [[D:%.*]] = sdiv <2 x i8> [[X:%.*]], <i8 42, i8 42>
+; CHECK-NEXT:    [[D:%.*]] = sdiv <2 x i8> [[X:%.*]], splat (i8 42)
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
   %neg = sub nsw <2 x i8> zeroinitializer, %x
@@ -484,7 +484,7 @@ define i8 @sdiv_exact_negated_dividend_constant_divisor(i8 %x) {
 
 define <2 x i8> @sdiv_exact_negated_dividend_constant_divisor_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @sdiv_exact_negated_dividend_constant_divisor_vec_splat(
-; CHECK-NEXT:    [[D:%.*]] = sdiv exact <2 x i8> [[X:%.*]], <i8 42, i8 42>
+; CHECK-NEXT:    [[D:%.*]] = sdiv exact <2 x i8> [[X:%.*]], splat (i8 42)
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
   %neg = sub nsw <2 x i8> zeroinitializer, %x
@@ -505,7 +505,7 @@ define i8 @sdiv_negated_dividend_constant_divisor_smin(i8 %x) {
 
 define <2 x i8> @sdiv_negated_dividend_constant_divisor_vec_splat_smin(<2 x i8> %x) {
 ; CHECK-LABEL: @sdiv_negated_dividend_constant_divisor_vec_splat_smin(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
@@ -569,8 +569,8 @@ define i32 @test35(i32 %A) {
 
 define <2 x i32> @test35vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test35vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
-; CHECK-NEXT:    [[MUL:%.*]] = udiv exact <2 x i32> [[AND]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647)
+; CHECK-NEXT:    [[MUL:%.*]] = udiv exact <2 x i32> [[AND]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %and = and <2 x i32> %A, <i32 2147483647, i32 2147483647>
@@ -592,7 +592,7 @@ define i32 @test36(i32 %A) {
 
 define <2 x i32> @test36vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test36vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647)
 ; CHECK-NEXT:    [[MUL:%.*]] = lshr exact <2 x i32> [[AND]], [[A]]
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
@@ -657,7 +657,7 @@ define i32 @zap(i8 %x) {
 
 define <3 x i32> @shrink_vec(<3 x i8> %x) {
 ; CHECK-LABEL: @shrink_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <3 x i8> [[X:%.*]], <i8 127, i8 127, i8 127>
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <3 x i8> [[X:%.*]], splat (i8 127)
 ; CHECK-NEXT:    [[DIV:%.*]] = sext <3 x i8> [[TMP1]] to <3 x i32>
 ; CHECK-NEXT:    ret <3 x i32> [[DIV]]
 ;
@@ -668,7 +668,7 @@ define <3 x i32> @shrink_vec(<3 x i8> %x) {
 
 define <2 x i32> @zap_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @zap_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[DIV]]
 ;
@@ -741,8 +741,8 @@ define i8 @div_factor_signed(i8 %x, i8 %y) {
 define <2 x i8> @div_factor_signed_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @div_factor_signed_vec(
 ; CHECK-NEXT:    [[Y_FR:%.*]] = freeze <2 x i8> [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[Y_FR]], <i8 1, i8 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], <i8 3, i8 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[Y_FR]], splat (i8 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i8> [[TMP1]], splat (i8 3)
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[TMP2]], <2 x i8> [[Y_FR]], <2 x i8> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -768,7 +768,7 @@ define i8 @div_factor_unsigned(i8 %x, i8 %y) {
 
 define <2 x i8> @div_factor_unsigned_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @div_factor_unsigned_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[Y:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[Y:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[R:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -875,7 +875,7 @@ define <2 x i64> @test_exact_vec(<2 x i64> %x) {
 
 define <2 x i8> @negate_sdiv_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @negate_sdiv_vec_splat(
-; CHECK-NEXT:    [[DIV_NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], <i8 -42, i8 -42>
+; CHECK-NEXT:    [[DIV_NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], splat (i8 -42)
 ; CHECK-NEXT:    ret <2 x i8> [[DIV_NEG]]
 ;
   %div = sdiv <2 x i8> %x, <i8 42, i8 42>
@@ -910,7 +910,7 @@ define <2 x i8> @negate_sdiv_vec_splat_one(<2 x i8> %x) {
 
 define <2 x i8> @negate_sdiv_vec_splat_signed_min(<2 x i8> %x) {
 ; CHECK-LABEL: @negate_sdiv_vec_splat_signed_min(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    [[DIV_NEG:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[DIV_NEG]]
 ;
@@ -1034,7 +1034,7 @@ define i8 @sdiv_by_int_min(i8 %x) {
 
 define <2 x i8> @sdiv_by_int_min_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @sdiv_by_int_min_vec_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    [[D:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
@@ -1052,7 +1052,7 @@ define <2 x i8> @sdiv_by_int_min_vec_splat_poison(<2 x i8> %x) {
 
 define <2 x i8> @sdiv_by_negconst_v2i8(<2 x i8> %x) {
 ; CHECK-LABEL: @sdiv_by_negconst_v2i8(
-; CHECK-NEXT:    [[DIV_NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], <i8 108, i8 108>
+; CHECK-NEXT:    [[DIV_NEG:%.*]] = sdiv <2 x i8> [[X:%.*]], splat (i8 108)
 ; CHECK-NEXT:    ret <2 x i8> [[DIV_NEG]]
 ;
   %div = sdiv <2 x i8> %x, <i8 -108, i8 -108>
@@ -1072,7 +1072,7 @@ define <vscale x 2 x i8> @sdiv_by_negconst_nxv2i8(<vscale x 2 x i8> %x) {
 
 define <2 x i8> @sdiv_by_minSigned_v2i8(<2 x i8> %x) {
 ; CHECK-LABEL: @sdiv_by_minSigned_v2i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    [[DIV_NEG:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[DIV_NEG]]
 ;
@@ -1152,7 +1152,7 @@ define i32 @sdiv_constant_dividend_select_divisor2(i1 %b, i32 %x) {
 
 define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec(i1 %b) {
 ; CHECK-LABEL: @sdiv_constant_dividend_select_of_constants_divisor_vec(
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> <i8 3, i8 8>, <2 x i8> <i8 -10, i8 -10>
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[B:%.*]], <2 x i8> <i8 3, i8 8>, <2 x i8> splat (i8 -10)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %s = select i1 %b, <2 x i8> <i8 12, i8 -5>, <2 x i8> <i8 -4, i8 4>
@@ -1164,7 +1164,7 @@ define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec(i1 %b) {
 
 define <2 x i8> @sdiv_constant_dividend_select_of_constants_divisor_vec_ub1(i1 %b) {
 ; CHECK-LABEL: @sdiv_constant_dividend_select_of_constants_divisor_vec_ub1(
-; CHECK-NEXT:    ret <2 x i8> <i8 -10, i8 -10>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -10)
 ;
   %s = select i1 %b, <2 x i8> <i8 0, i8 -5>, <2 x i8> <i8 -4, i8 4>
   %r = sdiv <2 x i8> <i8 42, i8 -42>, %s
diff --git a/llvm/test/Transforms/InstCombine/eq-of-parts.ll b/llvm/test/Transforms/InstCombine/eq-of-parts.ll
index 217e37b8593394..5cb1f1c3c28ddf 100644
--- a/llvm/test/Transforms/InstCombine/eq-of-parts.ll
+++ b/llvm/test/Transforms/InstCombine/eq-of-parts.ll
@@ -179,9 +179,9 @@ define i1 @eq_21_comm_eq2(i32 %x, i32 %y) {
 
 define <2x i1> @eq_21_vector(<2x i32> %x, <2x i32> %y) {
 ; CHECK-LABEL: @eq_21_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 8, i32 8>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[Y:%.*]], <i32 8, i32 8>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[Y:%.*]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16>
 ; CHECK-NEXT:    [[C_210:%.*]] = icmp eq <2 x i16> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    ret <2 x i1> [[C_210]]
@@ -835,9 +835,9 @@ define i1 @ne_21_comm_ne2(i32 %x, i32 %y) {
 
 define <2x i1> @ne_21_vector(<2x i32> %x, <2x i32> %y) {
 ; CHECK-LABEL: @ne_21_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 8, i32 8>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16>
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[Y:%.*]], <i32 8, i32 8>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[Y:%.*]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16>
 ; CHECK-NEXT:    [[C_210:%.*]] = icmp ne <2 x i16> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    ret <2 x i1> [[C_210]]
diff --git a/llvm/test/Transforms/InstCombine/exact.ll b/llvm/test/Transforms/InstCombine/exact.ll
index ccad6f974ffb1e..c7377ab17d540e 100644
--- a/llvm/test/Transforms/InstCombine/exact.ll
+++ b/llvm/test/Transforms/InstCombine/exact.ll
@@ -21,7 +21,7 @@ define i32 @sdiv2(i32 %x) {
 
 define <2 x i32> @sdiv2_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @sdiv2_vec(
-; CHECK-NEXT:    [[Y:%.*]] = ashr exact <2 x i32> [[X:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    [[Y:%.*]] = ashr exact <2 x i32> [[X:%.*]], splat (i32 7)
 ; CHECK-NEXT:    ret <2 x i32> [[Y]]
 ;
   %y = sdiv exact <2 x i32> %x, <i32 128, i32 128>
@@ -105,8 +105,8 @@ define i64 @ashr1(i64 %X) {
 
 define <2 x i64> @ashr1_vec(<2 x i64> %X) {
 ; CHECK-LABEL: @ashr1_vec(
-; CHECK-NEXT:    [[A:%.*]] = shl <2 x i64> [[X:%.*]], <i64 8, i64 8>
-; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[A]], <i64 2, i64 2>
+; CHECK-NEXT:    [[A:%.*]] = shl <2 x i64> [[X:%.*]], splat (i64 8)
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[A]], splat (i64 2)
 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 ;
   %A = shl <2 x i64> %X, <i64 8, i64 8>
@@ -137,7 +137,7 @@ define i1 @ashr_icmp2(i64 %X) {
 
 define <2 x i1> @ashr_icmp2_vec(<2 x i64> %X) {
 ; CHECK-LABEL: @ashr_icmp2_vec(
-; CHECK-NEXT:    [[Z:%.*]] = icmp slt <2 x i64> [[X:%.*]], <i64 16, i64 16>
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt <2 x i64> [[X:%.*]], splat (i64 16)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %Y = ashr exact <2 x i64> %X, <i64 2, i64 2>
@@ -163,10 +163,10 @@ define i1 @pr9998(i32 %V) {
 ; FIXME: Vectors should fold the same way.
 define <2 x i1> @pr9998vec(<2 x i32> %V) {
 ; CHECK-LABEL: @pr9998vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[V:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[X:%.*]] = sub nsw <2 x i32> zeroinitializer, [[TMP1]]
 ; CHECK-NEXT:    [[Y:%.*]] = sext <2 x i32> [[X]] to <2 x i64>
-; CHECK-NEXT:    [[Z:%.*]] = icmp ugt <2 x i64> [[Y]], <i64 7297771788697658747, i64 7297771788697658747>
+; CHECK-NEXT:    [[Z:%.*]] = icmp ugt <2 x i64> [[Y]], splat (i64 7297771788697658747)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %W = shl <2 x i32> %V, <i32 31, i32 31>
@@ -248,7 +248,7 @@ define i1 @sdiv_icmp2(i64 %X) {
 
 define <2 x i1> @sdiv_icmp2_vec(<2 x i64> %X) {
 ; CHECK-LABEL: @sdiv_icmp2_vec(
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], <i64 5, i64 5>
+; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 5)
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
   %A = sdiv exact <2 x i64> %X, <i64 5, i64 5>
@@ -268,7 +268,7 @@ define i1 @sdiv_icmp3(i64 %X) {
 
 define <2 x i1> @sdiv_icmp3_vec(<2 x i64> %X) {
 ; CHECK-LABEL: @sdiv_icmp3_vec(
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], <i64 -5, i64 -5>
+; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 -5)
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
   %A = sdiv exact <2 x i64> %X, <i64 5, i64 5>
@@ -308,7 +308,7 @@ define i1 @sdiv_icmp5(i64 %X) {
 
 define <2 x i1> @sdiv_icmp5_vec(<2 x i64> %X) {
 ; CHECK-LABEL: @sdiv_icmp5_vec(
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], <i64 -5, i64 -5>
+; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 -5)
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
   %A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
@@ -328,7 +328,7 @@ define i1 @sdiv_icmp6(i64 %X) {
 
 define <2 x i1> @sdiv_icmp6_vec(<2 x i64> %X) {
 ; CHECK-LABEL: @sdiv_icmp6_vec(
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], <i64 5, i64 5>
+; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 5)
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
   %A = sdiv exact <2 x i64> %X, <i64 -5, i64 -5>
@@ -402,7 +402,7 @@ define i8 @mul_of_sdiv_fail_ub(i8 %x) {
 define <2 x i8> @mul_of_sdiv_fail_ub_non_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @mul_of_sdiv_fail_ub_non_splat(
 ; CHECK-NEXT:    [[DIV:%.*]] = sdiv exact <2 x i8> [[X:%.*]], <i8 -6, i8 -12>
-; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i8> [[DIV]], <i8 6, i8 6>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i8> [[DIV]], splat (i8 6)
 ; CHECK-NEXT:    ret <2 x i8> [[MUL]]
 ;
   %div = sdiv exact <2 x i8> %x, <i8 -6, i8 -12>
diff --git a/llvm/test/Transforms/InstCombine/exp2-1.ll b/llvm/test/Transforms/InstCombine/exp2-1.ll
index d8bd0a4d8159db..0502540b7f7e92 100644
--- a/llvm/test/Transforms/InstCombine/exp2-1.ll
+++ b/llvm/test/Transforms/InstCombine/exp2-1.ll
@@ -309,22 +309,22 @@ define float @sitofp_scalar_intrinsic_with_FMF(i8 %x) {
 define <2 x float> @sitofp_vector_intrinsic_with_FMF(<2 x i8> %x) {
 ; LDEXP32-LABEL: @sitofp_vector_intrinsic_with_FMF(
 ; LDEXP32-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32>
-; LDEXP32-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
+; LDEXP32-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]])
 ; LDEXP32-NEXT:    ret <2 x float> [[R]]
 ;
 ; LDEXP16-LABEL: @sitofp_vector_intrinsic_with_FMF(
 ; LDEXP16-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i16>
-; LDEXP16-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i16> [[TMP1]])
+; LDEXP16-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float> splat (float 1.000000e+00), <2 x i16> [[TMP1]])
 ; LDEXP16-NEXT:    ret <2 x float> [[R]]
 ;
 ; NOLDEXPF-LABEL: @sitofp_vector_intrinsic_with_FMF(
 ; NOLDEXPF-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32>
-; NOLDEXPF-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
+; NOLDEXPF-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]])
 ; NOLDEXPF-NEXT:    ret <2 x float> [[R]]
 ;
 ; NOLDEXP-LABEL: @sitofp_vector_intrinsic_with_FMF(
 ; NOLDEXP-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32>
-; NOLDEXP-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
+; NOLDEXP-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]])
 ; NOLDEXP-NEXT:    ret <2 x float> [[R]]
 ;
   %s = sitofp <2 x i8> %x to <2 x float>
diff --git a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll
index 969020140cb339..c2c7ece8483a9e 100644
--- a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll
+++ b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll
@@ -30,7 +30,7 @@ define <2 x float> @exp2_v2f32_sitofp_v2i8(<2 x i8> %x) {
 ; CHECK-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8(
 ; CHECK-SAME: <2 x i8> [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32>
-; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
+; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[EXP2]]
 ;
   %itofp = sitofp <2 x i8> %x to <2 x float>
diff --git a/llvm/test/Transforms/InstCombine/fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fabs-as-int.ll
index 4e49ff159f875d..9d28cae8f04d63 100644
--- a/llvm/test/Transforms/InstCombine/fabs-as-int.ll
+++ b/llvm/test/Transforms/InstCombine/fabs-as-int.ll
@@ -19,7 +19,7 @@ define <2 x i32> @fabs_as_int_v2f32_noimplicitfloat(<2 x float> %x) noimplicitfl
 ; CHECK-LABEL: define <2 x i32> @fabs_as_int_v2f32_noimplicitfloat
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[BC]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[BC]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <2 x i32> [[AND]]
 ;
   %bc = bitcast <2 x float> %x to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/fabs-copysign.ll b/llvm/test/Transforms/InstCombine/fabs-copysign.ll
index 438760b76f2039..e1d9f8bf2a4b98 100644
--- a/llvm/test/Transforms/InstCombine/fabs-copysign.ll
+++ b/llvm/test/Transforms/InstCombine/fabs-copysign.ll
@@ -31,7 +31,7 @@ define double @fabs_copysign_commuted(double %x) {
 
 define <4 x double> @fabs_copysign_vec(<4 x double> %x) {
 ; CHECK-LABEL: @fabs_copysign_vec(
-; CHECK-NEXT:    [[DIV:%.*]] = call nnan ninf <4 x double> @llvm.copysign.v4f64(<4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, <4 x double> [[X:%.*]])
+; CHECK-NEXT:    [[DIV:%.*]] = call nnan ninf <4 x double> @llvm.copysign.v4f64(<4 x double> splat (double 1.000000e+00), <4 x double> [[X:%.*]])
 ; CHECK-NEXT:    ret <4 x double> [[DIV]]
 ;
   %f = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
@@ -41,7 +41,7 @@ define <4 x double> @fabs_copysign_vec(<4 x double> %x) {
 
 define <4 x double> @fabs_copysign_vec_commuted(<4 x double> %x) {
 ; CHECK-LABEL: @fabs_copysign_vec_commuted(
-; CHECK-NEXT:    [[DIV:%.*]] = call nnan ninf <4 x double> @llvm.copysign.v4f64(<4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, <4 x double> [[X:%.*]])
+; CHECK-NEXT:    [[DIV:%.*]] = call nnan ninf <4 x double> @llvm.copysign.v4f64(<4 x double> splat (double 1.000000e+00), <4 x double> [[X:%.*]])
 ; CHECK-NEXT:    ret <4 x double> [[DIV]]
 ;
   %f = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x)
diff --git a/llvm/test/Transforms/InstCombine/fabs-fneg-fold.ll b/llvm/test/Transforms/InstCombine/fabs-fneg-fold.ll
index a1988e5d1638d7..dd8d0aed3210e1 100644
--- a/llvm/test/Transforms/InstCombine/fabs-fneg-fold.ll
+++ b/llvm/test/Transforms/InstCombine/fabs-fneg-fold.ll
@@ -137,7 +137,7 @@ define float @fabs_fneg_no_fabs(float %x) {
 define <2 x float> @fabs_fneg_splat_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @fabs_fneg_splat_v2f32(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    ret <2 x float> <float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 2.000000e+00)
 ;
   %neg = fneg <2 x float> <float -2.0, float -2.0>
   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %neg)
diff --git a/llvm/test/Transforms/InstCombine/fadd.ll b/llvm/test/Transforms/InstCombine/fadd.ll
index 840ccaef1086ab..36c387969d05d6 100644
--- a/llvm/test/Transforms/InstCombine/fadd.ll
+++ b/llvm/test/Transforms/InstCombine/fadd.ll
@@ -478,7 +478,7 @@ define float @fadd_fmul_common_op(float %x) {
 
 define <2 x float> @fadd_fmul_common_op_vec(<2 x float> %x) {
 ; CHECK-LABEL: @fadd_fmul_common_op_vec(
-; CHECK-NEXT:    [[A:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], <float 4.300000e+01, float 4.300000e+01>
+; CHECK-NEXT:    [[A:%.*]] = fmul reassoc nsz <2 x float> [[X:%.*]], splat (float 4.300000e+01)
 ; CHECK-NEXT:    ret <2 x float> [[A]]
 ;
   %m = fmul reassoc nsz <2 x float> %x, <float 42.0, float 42.0>
diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll
index 32f136d53fab4b..5a80cf56712035 100644
--- a/llvm/test/Transforms/InstCombine/fast-math.ll
+++ b/llvm/test/Transforms/InstCombine/fast-math.ll
@@ -587,7 +587,7 @@ define float @fdiv2(float %x) {
 
 define <2 x float> @fdiv2_vec(<2 x float> %x) {
 ; CHECK-LABEL: @fdiv2_vec(
-; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], <float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    ret <2 x float> [[DIV1]]
 ;
   %mul = fmul <2 x float> %x, <float 6.0, float 9.0>
diff --git a/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll b/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll
index 10a3ccf3cdb48a..27327061da5c0e 100644
--- a/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp-range-check-idiom.ll
@@ -186,7 +186,7 @@ define <2 x i1> @test_and_olt_poison(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x i1> @test_and_olt_poison(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]])
-; CHECK-NEXT:    [[COND:%.*]] = fcmp olt <2 x float> [[TMP1]], <float 0x3C00000000000000, float 0x3C00000000000000>
+; CHECK-NEXT:    [[COND:%.*]] = fcmp olt <2 x float> [[TMP1]], splat (float 0x3C00000000000000)
 ; CHECK-NEXT:    ret <2 x i1> [[COND]]
 ;
   %cmp1 = fcmp olt <2 x float> %x, <float 0x3C00000000000000, float poison>
diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll
index 0d45baddcb2fc7..119cffd73c662c 100644
--- a/llvm/test/Transforms/InstCombine/fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/fcmp.ll
@@ -32,7 +32,7 @@ define i1 @fpext_constant(float %a) {
 
 define <2 x i1> @fpext_constant_vec_splat(<2 x half> %a) {
 ; CHECK-LABEL: @fpext_constant_vec_splat(
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp nnan ole <2 x half> [[A:%.*]], <half 0xH5140, half 0xH5140>
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp nnan ole <2 x half> [[A:%.*]], splat (half 0xH5140)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %ext = fpext <2 x half> %a to <2 x double>
@@ -780,7 +780,7 @@ define i1 @lossy_une(half %x) {
 define <2 x i1> @lossy_ogt(<2 x float> %x) {
 ; CHECK-LABEL: @lossy_ogt(
 ; CHECK-NEXT:    [[E:%.*]] = fpext <2 x float> [[X:%.*]] to <2 x double>
-; CHECK-NEXT:    [[R:%.*]] = fcmp ogt <2 x double> [[E]], <double 1.000000e-01, double 1.000000e-01>
+; CHECK-NEXT:    [[R:%.*]] = fcmp ogt <2 x double> [[E]], splat (double 1.000000e-01)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %e = fpext <2 x float> %x to <2 x double>
@@ -826,7 +826,7 @@ define i1 @lossy_ole(half %x) {
 define <2 x i1> @lossy_ugt(<2 x float> %x) {
 ; CHECK-LABEL: @lossy_ugt(
 ; CHECK-NEXT:    [[E:%.*]] = fpext <2 x float> [[X:%.*]] to <2 x double>
-; CHECK-NEXT:    [[R:%.*]] = fcmp ugt <2 x double> [[E]], <double 1.000000e-01, double 1.000000e-01>
+; CHECK-NEXT:    [[R:%.*]] = fcmp ugt <2 x double> [[E]], splat (double 1.000000e-01)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %e = fpext <2 x float> %x to <2 x double>
@@ -1222,7 +1222,7 @@ define i1 @bitcast_eq0(i32 %x) {
 
 define <2 x i1> @bitcast_ne0(<2 x i32> %x) {
 ; CHECK-LABEL: @bitcast_ne0(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 2147483647)
 ; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll
index 12d6e6463de657..06c78a8c6206dc 100644
--- a/llvm/test/Transforms/InstCombine/fdiv.ll
+++ b/llvm/test/Transforms/InstCombine/fdiv.ll
@@ -78,7 +78,7 @@ define float @not_exact_but_allow_recip_but_denorm(float %x) {
 
 define <2 x float> @exact_inverse_splat(<2 x float> %x) {
 ; CHECK-LABEL: @exact_inverse_splat(
-; CHECK-NEXT:    [[DIV:%.*]] = fmul <2 x float> [[X:%.*]], <float 2.500000e-01, float 2.500000e-01>
+; CHECK-NEXT:    [[DIV:%.*]] = fmul <2 x float> [[X:%.*]], splat (float 2.500000e-01)
 ; CHECK-NEXT:    ret <2 x float> [[DIV]]
 ;
   %div = fdiv <2 x float> %x, <float 4.0, float 4.0>
@@ -98,7 +98,7 @@ define <vscale x 2 x float> @exact_inverse_scalable_splat(<vscale x 2 x float> %
 
 define <2 x float> @not_exact_but_allow_recip_splat(<2 x float> %x) {
 ; CHECK-LABEL: @not_exact_but_allow_recip_splat(
-; CHECK-NEXT:    [[DIV:%.*]] = fmul arcp <2 x float> [[X:%.*]], <float 0x3FD5555560000000, float 0x3FD5555560000000>
+; CHECK-NEXT:    [[DIV:%.*]] = fmul arcp <2 x float> [[X:%.*]], splat (float 0x3FD5555560000000)
 ; CHECK-NEXT:    ret <2 x float> [[DIV]]
 ;
   %div = fdiv arcp <2 x float> %x, <float 3.0, float 3.0>
@@ -116,7 +116,7 @@ define <2 x float> @exact_inverse_vec(<2 x float> %x) {
 
 define <2 x float> @not_exact_inverse_splat(<2 x float> %x) {
 ; CHECK-LABEL: @not_exact_inverse_splat(
-; CHECK-NEXT:    [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], <float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    ret <2 x float> [[DIV]]
 ;
   %div = fdiv <2 x float> %x, <float 3.0, float 3.0>
@@ -465,7 +465,7 @@ define float @div_factor_too_strict(float %x, float %y) {
 
 define <2 x float> @div_factor_commute(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @div_factor_commute(
-; CHECK-NEXT:    [[D:%.*]] = fdiv reassoc nnan ninf nsz <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[Y:%.*]]
+; CHECK-NEXT:    [[D:%.*]] = fdiv reassoc nnan ninf nsz <2 x float> splat (float 1.000000e+00), [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x float> [[D]]
 ;
   %m = fmul <2 x float> %y, %x
@@ -959,7 +959,7 @@ define float @fdiv_nnan_zero_f32(float %x) {
 
 define <2 x float> @fdiv_nnan_zero_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: @fdiv_nnan_zero_v2f32(
-; CHECK-NEXT:    [[FDIV:%.*]] = call nnan <2 x float> @llvm.copysign.v2f32(<2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000>, <2 x float> [[X:%.*]])
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 0x7FF0000000000000), <2 x float> [[X:%.*]])
 ; CHECK-NEXT:    ret <2 x float> [[FDIV]]
 ;
   %fdiv = fdiv nnan <2 x float> %x, zeroinitializer
@@ -977,7 +977,7 @@ define float @fdiv_nnan_zero_f32_fmf(float %x) {
 
 define <2 x float> @fdiv_nnan_zero_v2f32_fmf(<2 x float> %x) {
 ; CHECK-LABEL: @fdiv_nnan_zero_v2f32_fmf(
-; CHECK-NEXT:    [[FDIV:%.*]] = call nnan nsz <2 x float> @llvm.copysign.v2f32(<2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000>, <2 x float> [[X:%.*]])
+; CHECK-NEXT:    [[FDIV:%.*]] = call nnan nsz <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 0x7FF0000000000000), <2 x float> [[X:%.*]])
 ; CHECK-NEXT:    ret <2 x float> [[FDIV]]
 ;
   %fdiv = fdiv nnan nsz <2 x float> %x, zeroinitializer
@@ -1031,7 +1031,7 @@ define double @test_negative_zero(double %X) {
 
 define <2 x double> @test_positive_zero_vector_nsz(<2 x double> %X) {
 ; CHECK-LABEL: @test_positive_zero_vector_nsz(
-; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz <2 x double> @llvm.copysign.v2f64(<2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>, <2 x double> [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 0x7FF0000000000000), <2 x double> [[X:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = fdiv nnan nsz <2 x double> %X, <double 0.0, double 0.0>
@@ -1040,7 +1040,7 @@ define <2 x double> @test_positive_zero_vector_nsz(<2 x double> %X) {
 
 define <2 x double> @test_negative_zero_vector_nsz(<2 x double> %X) {
 ; CHECK-LABEL: @test_negative_zero_vector_nsz(
-; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz <2 x double> @llvm.copysign.v2f64(<2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>, <2 x double> [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan nsz <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 0x7FF0000000000000), <2 x double> [[X:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = fdiv nnan nsz <2 x double> %X, <double -0.0, double 0.0>
@@ -1049,7 +1049,7 @@ define <2 x double> @test_negative_zero_vector_nsz(<2 x double> %X) {
 
 define <2 x double> @test_positive_zero_vector(<2 x double> %X) {
 ; CHECK-LABEL: @test_positive_zero_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <2 x double> @llvm.copysign.v2f64(<2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>, <2 x double> [[X:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 0x7FF0000000000000), <2 x double> [[X:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
   %1 = fdiv nnan <2 x double> %X, <double 0.0, double 0.0>
diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll
index 4590bbb7abaeb8..ae0067d41426cf 100644
--- a/llvm/test/Transforms/InstCombine/fma.ll
+++ b/llvm/test/Transforms/InstCombine/fma.ll
@@ -587,7 +587,7 @@ define <2 x double> @fma_const_fmul_one2(<2 x double> %b) {
 
 define <2 x double> @fma_nan_and_const_0(<2 x double> %b) {
 ; CHECK-LABEL: @fma_nan_and_const_0(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> %b)
   ret <2 x double> %res
@@ -595,7 +595,7 @@ define <2 x double> @fma_nan_and_const_0(<2 x double> %b) {
 
 define <2 x double> @fma_nan_and_const_1(<2 x double> %b) {
 ; CHECK-LABEL: @fma_nan_and_const_1(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> %b)
   ret <2 x double> %res
@@ -603,7 +603,7 @@ define <2 x double> @fma_nan_and_const_1(<2 x double> %b) {
 
 define <2 x double> @fma_nan_and_const_2(<2 x double> %b) {
 ; CHECK-LABEL: @fma_nan_and_const_2(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> %b, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>)
   ret <2 x double> %res
@@ -611,7 +611,7 @@ define <2 x double> @fma_nan_and_const_2(<2 x double> %b) {
 
 define <2 x double> @fma_undef_0(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_undef_0(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double undef, double undef>, <2 x double> %b, <2 x double> %c)
   ret <2 x double> %res
@@ -619,7 +619,7 @@ define <2 x double> @fma_undef_0(<2 x double> %b, <2 x double> %c) {
 
 define <2 x double> @fma_undef_1(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_undef_1(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> <double undef, double undef>, <2 x double> %c)
   ret <2 x double> %res
@@ -627,7 +627,7 @@ define <2 x double> @fma_undef_1(<2 x double> %b, <2 x double> %c) {
 
 define <2 x double> @fma_undef_2(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_undef_2(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> <double undef, double undef>)
   ret <2 x double> %res
@@ -663,14 +663,14 @@ define <2 x double> @fma_partial_undef_2(<2 x double> %b, <2 x double> %c) {
 
 define <2 x double> @fma_nan_0(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_nan_0(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> %b, <2 x double> %c)
   ret <2 x double> %res
 }
 define <2 x double> @fma_nan_1(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_nan_1(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> %c)
   ret <2 x double> %res
@@ -678,7 +678,7 @@ define <2 x double> @fma_nan_1(<2 x double> %b, <2 x double> %c) {
 
 define <2 x double> @fma_nan_2(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fma_nan_2(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fma.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>)
   ret <2 x double> %res
@@ -695,7 +695,7 @@ define <2 x double> @fmuladd_const_fmul(<2 x double> %b) {
 
 define <2 x double> @fmuladd_nan_and_const_0(<2 x double> %b) {
 ; CHECK-LABEL: @fmuladd_nan_and_const_0(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> %b)
   ret <2 x double> %res
@@ -703,7 +703,7 @@ define <2 x double> @fmuladd_nan_and_const_0(<2 x double> %b) {
 
 define <2 x double> @fmuladd_nan_and_const_1(<2 x double> %b) {
 ; CHECK-LABEL: @fmuladd_nan_and_const_1(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> %b)
   ret <2 x double> %res
@@ -711,7 +711,7 @@ define <2 x double> @fmuladd_nan_and_const_1(<2 x double> %b) {
 
 define <2 x double> @fmuladd_nan_and_const_2(<2 x double> %b) {
 ; CHECK-LABEL: @fmuladd_nan_and_const_2(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> <double 0.0000000129182, double 0.000009123>, <2 x double> %b, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>)
   ret <2 x double> %res
@@ -719,7 +719,7 @@ define <2 x double> @fmuladd_nan_and_const_2(<2 x double> %b) {
 
 define <2 x double> @fmuladd_nan_0(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fmuladd_nan_0(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> %b, <2 x double> %c)
   ret <2 x double> %res
@@ -727,7 +727,7 @@ define <2 x double> @fmuladd_nan_0(<2 x double> %b, <2 x double> %c) {
 
 define <2 x double> @fmuladd_nan_1(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fmuladd_nan_1(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> %c)
   ret <2 x double> %res
@@ -735,7 +735,7 @@ define <2 x double> @fmuladd_nan_1(<2 x double> %b, <2 x double> %c) {
 
 define <2 x double> @fmuladd_undef_0(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fmuladd_undef_0(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> <double undef, double undef>, <2 x double> %b, <2 x double> %c)
   ret <2 x double> %res
@@ -743,7 +743,7 @@ define <2 x double> @fmuladd_undef_0(<2 x double> %b, <2 x double> %c) {
 
 define <2 x double> @fmuladd_undef_1(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fmuladd_undef_1(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> <double undef, double undef>, <2 x double> %c)
   ret <2 x double> %res
@@ -751,7 +751,7 @@ define <2 x double> @fmuladd_undef_1(<2 x double> %b, <2 x double> %c) {
 
 define <2 x double> @fmuladd_undef_2(<2 x double> %b, <2 x double> %c) {
 ; CHECK-LABEL: @fmuladd_undef_2(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF8000000000000)
 ;
   %res = call nnan nsz <2 x double> @llvm.fmuladd.v2f64(<2 x double> %b, <2 x double> %c, <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>)
   ret <2 x double> %res
diff --git a/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll b/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll
index 716781564ca70f..3c598f91fad410 100644
--- a/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/fmul-inseltpoison.ll
@@ -10,7 +10,7 @@ define void @test8(ptr %inout, i1 %c1) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
-; CHECK-NEXT:    [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ]
 ; CHECK-NEXT:    br i1 [[C1:%.*]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[TMP0]] = insertelement <4 x float> [[LOCAL_VAR_7_0]], float 0.000000e+00, i64 2
diff --git a/llvm/test/Transforms/InstCombine/fmul-sqrt.ll b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll
index 72ac2f18f113a4..2166db8f5c4e59 100644
--- a/llvm/test/Transforms/InstCombine/fmul-sqrt.ll
+++ b/llvm/test/Transforms/InstCombine/fmul-sqrt.ll
@@ -118,7 +118,7 @@ define <2 x float> @x_add_y_rsqrt_reassociate_extra_use(<2 x float> %x, <2 x flo
 ; CHECK-LABEL: @x_add_y_rsqrt_reassociate_extra_use(
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd fast <2 x float> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[SQRT:%.*]] = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ADD]])
-; CHECK-NEXT:    [[RSQRT:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[SQRT]]
+; CHECK-NEXT:    [[RSQRT:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[SQRT]]
 ; CHECK-NEXT:    [[RES:%.*]] = fdiv fast <2 x float> [[ADD]], [[SQRT]]
 ; CHECK-NEXT:    store <2 x float> [[RSQRT]], ptr [[P:%.*]], align 8
 ; CHECK-NEXT:    ret <2 x float> [[RES]]
diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index 4554b4ed8844de..51b70ef7e98004 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -377,7 +377,7 @@ define void @test8(ptr %inout, i1 %c1) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
-; CHECK-NEXT:    [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[LOCAL_VAR_7_0:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[FOR_BODY:%.*]] ]
 ; CHECK-NEXT:    br i1 [[C1:%.*]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[TMP0]] = insertelement <4 x float> [[LOCAL_VAR_7_0]], float 0.000000e+00, i64 2
@@ -821,8 +821,8 @@ define float @fmul_fadd_distribute(float %x) {
 
 define <2 x float> @fmul_fadd_distribute_vec(<2 x float> %x) {
 ; CHECK-LABEL: @fmul_fadd_distribute_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc <2 x float> [[X:%.*]], <float 6.000000e+03, float 6.000000e+03>
-; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc <2 x float> [[TMP1]], <float 1.200000e+07, float 1.200000e+07>
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc <2 x float> [[X:%.*]], splat (float 6.000000e+03)
+; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc <2 x float> [[TMP1]], splat (float 1.200000e+07)
 ; CHECK-NEXT:    ret <2 x float> [[T3]]
 ;
   %t1 = fadd reassoc <2 x float> <float 2.0e+3, float 2.0e+3>, %x
@@ -1164,7 +1164,7 @@ define float @negate_if_false(float %x, i1 %cond) {
 
 define <2 x double> @negate_if_true_commute(<2 x double> %px, i1 %cond) {
 ; CHECK-LABEL: @negate_if_true_commute(
-; CHECK-NEXT:    [[X:%.*]] = fdiv <2 x double> <double 4.200000e+01, double 4.200000e+01>, [[PX:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = fdiv <2 x double> splat (double 4.200000e+01), [[PX:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = fneg ninf <2 x double> [[X]]
 ; CHECK-NEXT:    [[R:%.*]] = select ninf i1 [[COND:%.*]], <2 x double> [[TMP1]], <2 x double> [[X]]
 ; CHECK-NEXT:    ret <2 x double> [[R]]
@@ -1207,8 +1207,8 @@ define float @negate_if_true_extra_use(float %x, i1 %cond) {
 
 define <2 x double> @negate_if_true_wrong_constant(<2 x double> %px, i1 %cond) {
 ; CHECK-LABEL: @negate_if_true_wrong_constant(
-; CHECK-NEXT:    [[X:%.*]] = fdiv <2 x double> <double 4.200000e+01, double 4.200000e+01>, [[PX:%.*]]
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x double> <double -1.000000e+00, double 0.000000e+00>, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[X:%.*]] = fdiv <2 x double> splat (double 4.200000e+01), [[PX:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x double> <double -1.000000e+00, double 0.000000e+00>, <2 x double> splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[R:%.*]] = fmul <2 x double> [[X]], [[SEL]]
 ; CHECK-NEXT:    ret <2 x double> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/fneg-as-int.ll b/llvm/test/Transforms/InstCombine/fneg-as-int.ll
index e3067b0d024614..f8d88b4f238f27 100644
--- a/llvm/test/Transforms/InstCombine/fneg-as-int.ll
+++ b/llvm/test/Transforms/InstCombine/fneg-as-int.ll
@@ -19,7 +19,7 @@ define <2 x i32> @fneg_as_int_v2f32_noimplicitfloat(<2 x float> %x) noimplicitfl
 ; CHECK-LABEL: define <2 x i32> @fneg_as_int_v2f32_noimplicitfloat
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
-; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[BC]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[BC]], splat (i32 -2147483648)
 ; CHECK-NEXT:    ret <2 x i32> [[XOR]]
 ;
   %bc = bitcast <2 x float> %x to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll
index 8c3e6958fe083e..8b245bdd792993 100644
--- a/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll
+++ b/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll
@@ -17,7 +17,7 @@ define <2 x i32> @fneg_fabs_as_int_v2f32_noimplicitfloat(<2 x float> %x) noimpli
 ; CHECK-LABEL: define <2 x i32> @fneg_fabs_as_int_v2f32_noimplicitfloat
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x float> [[X]] to <2 x i32>
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[BC]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[BC]], splat (i32 -2147483648)
 ; CHECK-NEXT:    ret <2 x i32> [[OR]]
 ;
   %bc = bitcast <2 x float> %x to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
index 3461b19d0d7002..f28262b2a77e04 100644
--- a/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
+++ b/llvm/test/Transforms/InstCombine/fold-bin-operand.ll
@@ -47,7 +47,7 @@ define <4 x float> @h1(i1 %A, <4 x i32> %B) {
 ; CHECK-LABEL: @h1(
 ; CHECK-NEXT:  EntryBlock:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float>
-; CHECK-NEXT:    [[BC:%.*]] = select i1 [[A:%.*]], <4 x float> <float 0x36A0000000000000, float 0x36A0000000000000, float 0x36A0000000000000, float 0x36A0000000000000>, <4 x float> [[TMP0]]
+; CHECK-NEXT:    [[BC:%.*]] = select i1 [[A:%.*]], <4 x float> splat (float 0x36A0000000000000), <4 x float> [[TMP0]]
 ; CHECK-NEXT:    ret <4 x float> [[BC]]
 ;
 EntryBlock:
diff --git a/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll b/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll
index 4626d19bd2899d..6b114d46fdbc6d 100644
--- a/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll
+++ b/llvm/test/Transforms/InstCombine/fold-ctpop-of-not.ll
@@ -144,7 +144,7 @@ define <2 x i1> @fold_cmp_ne_ctpop_c(<2 x i8> %x) {
 
 define <2 x i1> @fold_cmp_ne_ctpop_var_fail(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @fold_cmp_ne_ctpop_var_fail(
-; CHECK-NEXT:    [[NX:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[NX:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[CNT:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]])
 ; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[CNT]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
@@ -214,7 +214,7 @@ define <2 x i1> @fold_cmp_ugt_ctpop_c(<2 x i8> %x) {
 
 define <2 x i1> @fold_cmp_ugt_ctpop_c_out_of_range_fail(<2 x i8> %x) {
 ; CHECK-LABEL: @fold_cmp_ugt_ctpop_c_out_of_range_fail(
-; CHECK-NEXT:    [[NX:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[NX:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[CNT:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[NX]])
 ; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[CNT]], <i8 2, i8 10>
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll
index 1c28b151825c12..7dc4f5219b4382 100644
--- a/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll
+++ b/llvm/test/Transforms/InstCombine/fold-select-fmul-if-zero.ll
@@ -63,7 +63,7 @@ define float @ldexp_by_5_if_0_oeq_zero_f32(float %x) {
 define <2 x float> @ldexp_by_5_if_0_oeq_zero_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: @ldexp_by_5_if_0_oeq_zero_v2f32(
 ; CHECK-NEXT:    [[X_IS_ZERO:%.*]] = fcmp oeq <2 x float> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[SCALED_X:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> <i32 5, i32 5>)
+; CHECK-NEXT:    [[SCALED_X:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> splat (i32 5))
 ; CHECK-NEXT:    [[SCALED_IF_DENORMAL:%.*]] = select <2 x i1> [[X_IS_ZERO]], <2 x float> [[SCALED_X]], <2 x float> [[X]]
 ; CHECK-NEXT:    ret <2 x float> [[SCALED_IF_DENORMAL]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/fold-select-trunc.ll b/llvm/test/Transforms/InstCombine/fold-select-trunc.ll
index 5567d7d5e1fca9..0c7ca0c4fd0059 100644
--- a/llvm/test/Transforms/InstCombine/fold-select-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/fold-select-trunc.ll
@@ -59,7 +59,7 @@ define i8 @fold_select_trunc_negative(i8 %x, i8 %y) {
 define <2 x i8> @fold_select_trunc_vector(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @fold_select_trunc_vector(
 ; CHECK-NEXT:    [[TRUNC:%.*]] = trunc nuw <2 x i8> [[X:%.*]] to <2 x i1>
-; CHECK-NEXT:    [[RET:%.*]] = select <2 x i1> [[TRUNC]], <2 x i8> <i8 1, i8 1>, <2 x i8> [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = select <2 x i1> [[TRUNC]], <2 x i8> splat (i8 1), <2 x i8> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
   %trunc = trunc nuw <2 x i8> %x to <2 x i1>
diff --git a/llvm/test/Transforms/InstCombine/fold-signbit-test-power2.ll b/llvm/test/Transforms/InstCombine/fold-signbit-test-power2.ll
index a5c7cb3306ed08..be02eb6dd33463 100644
--- a/llvm/test/Transforms/InstCombine/fold-signbit-test-power2.ll
+++ b/llvm/test/Transforms/InstCombine/fold-signbit-test-power2.ll
@@ -38,8 +38,8 @@ define i1 @pow2_or_zero_is_negative_commute(i8 %A) {
 
 define <2 x i1> @pow2_or_zero_is_negative_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @pow2_or_zero_is_negative_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
-; CHECK-NEXT:    [[CMP_2:%.*]] = icmp eq <2 x i8> [[X]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -128)
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp eq <2 x i8> [[X]], splat (i8 -128)
 ; CHECK-NEXT:    call void @use_i1_vec(<2 x i1> [[CMP_2]])
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -53,8 +53,8 @@ define <2 x i1> @pow2_or_zero_is_negative_vec(<2 x i8> %x) {
 
 define <2 x i1> @pow2_or_zero_is_negative_vec_commute(<2 x i8> %A) {
 ; CHECK-LABEL: @pow2_or_zero_is_negative_vec_commute(
-; CHECK-NEXT:    [[X:%.*]] = mul <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[X:%.*]] = mul <2 x i8> [[A:%.*]], splat (i8 42)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X]], splat (i8 -128)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %x = mul <2 x i8> <i8 42, i8 42>, %A ; thwart complexity-based canonicalization
@@ -94,8 +94,8 @@ define i1 @pow2_or_zero_is_not_negative_commute(i8 %A) {
 
 define <2 x i1> @pow2_or_zero_is_not_negative_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @pow2_or_zero_is_not_negative_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
-; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ne <2 x i8> [[X]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i8> [[X:%.*]], splat (i8 -128)
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ne <2 x i8> [[X]], splat (i8 -128)
 ; CHECK-NEXT:    call void @use_i1_vec(<2 x i1> [[CMP_2]])
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -109,8 +109,8 @@ define <2 x i1> @pow2_or_zero_is_not_negative_vec(<2 x i8> %x) {
 
 define <2 x i1> @pow2_or_zero_is_not_negative_vec_commute(<2 x i8> %A) {
 ; CHECK-LABEL: @pow2_or_zero_is_not_negative_vec_commute(
-; CHECK-NEXT:    [[X:%.*]] = mul <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i8> [[X]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[X:%.*]] = mul <2 x i8> [[A:%.*]], splat (i8 42)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i8> [[X]], splat (i8 -128)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %x = mul <2 x i8> <i8 42, i8 42>, %A ; thwart complexity-based canonicalization
diff --git a/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll b/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll
index af580ba57513c2..3bd341ffafb58b 100644
--- a/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll
+++ b/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll
@@ -41,7 +41,7 @@ define i8 @p0_scalar_not_truly_negatable(i8 %x, i8 %y) {
 
 define <4 x i32> @p1_vector_splat(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @p1_vector_splat(
-; CHECK-NEXT:    [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0_NEG]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
@@ -52,7 +52,7 @@ define <4 x i32> @p1_vector_splat(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @p2_vector_poison(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @p2_vector_poison(
-; CHECK-NEXT:    [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[T1:%.*]] = add <4 x i32> [[T0_NEG]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[T1]]
 ;
@@ -109,7 +109,7 @@ define i32 @n5_is_not_not(i32 %x, i32 %y) {
 
 define <2 x i32> @n5_is_not_not_vec_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @n5_is_not_not_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = xor <2 x i32> [[X:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[T0:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 2147483647)
 ; CHECK-NEXT:    [[T1:%.*]] = sub <2 x i32> [[Y:%.*]], [[T0]]
 ; CHECK-NEXT:    ret <2 x i32> [[T1]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll b/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll
index 4034759df06048..66970a9d48ddf1 100644
--- a/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll
+++ b/llvm/test/Transforms/InstCombine/fpclass-check-idioms.ll
@@ -253,7 +253,7 @@ define <2 x i1> @f32_fcnan_fcinf_vec(<2 x float> %a) {
 ; CHECK-LABEL: define <2 x i1> @f32_fcnan_fcinf_vec(
 ; CHECK-SAME: <2 x float> [[A:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[A]])
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp ueq <2 x float> [[TMP1]], <float 0x7FF0000000000000, float 0x7FF0000000000000>
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ueq <2 x float> [[TMP1]], splat (float 0x7FF0000000000000)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %i32 = bitcast <2 x float> %a to <2 x i32>
@@ -278,7 +278,7 @@ define <2 x i1> @f32_fcinf_vec(<2 x float> %a) {
 ; CHECK-LABEL: define <2 x i1> @f32_fcinf_vec(
 ; CHECK-SAME: <2 x float> [[A:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[A]])
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <2 x float> [[TMP1]], <float 0x7FF0000000000000, float 0x7FF0000000000000>
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <2 x float> [[TMP1]], splat (float 0x7FF0000000000000)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %i32 = bitcast <2 x float> %a to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll
index f1fba6cb272f9f..65e29a8dc43f15 100644
--- a/llvm/test/Transforms/InstCombine/fsh.ll
+++ b/llvm/test/Transforms/InstCombine/fsh.ll
@@ -241,7 +241,7 @@ define <2 x i31> @fshl_op1_undef_splat_vec(<2 x i31> %x) {
 
 define <2 x i32> @fshr_op0_undef_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @fshr_op0_undef_splat_vec(
-; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 7)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> undef, <2 x i32> %x, <2 x i32> <i32 7, i32 7>)
@@ -250,7 +250,7 @@ define <2 x i32> @fshr_op0_undef_splat_vec(<2 x i32> %x) {
 
 define <2 x i32> @fshr_op1_zero_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @fshr_op1_zero_splat_vec(
-; CHECK-NEXT:    [[R:%.*]] = shl <2 x i32> [[X:%.*]], <i32 25, i32 25>
+; CHECK-NEXT:    [[R:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 25)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %r = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> zeroinitializer, <2 x i32> <i32 7, i32 7>)
@@ -864,7 +864,7 @@ define <2 x i31> @fshr_mask_args_same_vector(<2 x i31> %a) {
 define <2 x i32> @fshr_mask_args_same_vector2(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @fshr_mask_args_same_vector2(
 ; CHECK-NEXT:    [[T1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1000000, i32 100000>
-; CHECK-NEXT:    [[T3:%.*]] = lshr exact <2 x i32> [[T1]], <i32 3, i32 3>
+; CHECK-NEXT:    [[T3:%.*]] = lshr exact <2 x i32> [[T1]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
   %t1 = and <2 x i32> %a, <i32 1000000, i32 100000>
diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll
index fa0d59b2269983..1009d47817163f 100644
--- a/llvm/test/Transforms/InstCombine/funnel.ll
+++ b/llvm/test/Transforms/InstCombine/funnel.ll
@@ -34,7 +34,7 @@ define i42 @fshr_i42_constant(i42 %x, i42 %y) {
 
 define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) {
 ; CHECK-LABEL: @fshl_v2i16_constant_splat(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> splat (i16 1))
 ; CHECK-NEXT:    ret <2 x i16> [[R]]
 ;
   %shl = shl <2 x i16> %x, <i16 1, i16 1>
@@ -45,7 +45,7 @@ define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) {
 
 define <2 x i16> @fshl_v2i16_constant_splat_poison0(<2 x i16> %x, <2 x i16> %y) {
 ; CHECK-LABEL: @fshl_v2i16_constant_splat_poison0(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> splat (i16 1))
 ; CHECK-NEXT:    ret <2 x i16> [[R]]
 ;
   %shl = shl <2 x i16> %x, <i16 poison, i16 1>
@@ -56,7 +56,7 @@ define <2 x i16> @fshl_v2i16_constant_splat_poison0(<2 x i16> %x, <2 x i16> %y)
 
 define <2 x i16> @fshl_v2i16_constant_splat_poison1(<2 x i16> %x, <2 x i16> %y) {
 ; CHECK-LABEL: @fshl_v2i16_constant_splat_poison1(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>)
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> splat (i16 1))
 ; CHECK-NEXT:    ret <2 x i16> [[R]]
 ;
   %shl = shl <2 x i16> %x, <i16 1, i16 1>
@@ -481,11 +481,11 @@ define i32 @fshl_concat_unknown_source(i32 %zext.x, i32 %zext.y, ptr %addr) {
 define <2 x i32> @fshl_concat_vector(<2 x i8> %x, <2 x i24> %y, ptr %addr) {
 ; CHECK-LABEL: @fshl_concat_vector(
 ; CHECK-NEXT:    [[ZEXT_X:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[SLX:%.*]] = shl nuw <2 x i32> [[ZEXT_X]], <i32 24, i32 24>
+; CHECK-NEXT:    [[SLX:%.*]] = shl nuw <2 x i32> [[ZEXT_X]], splat (i32 24)
 ; CHECK-NEXT:    [[ZEXT_Y:%.*]] = zext <2 x i24> [[Y:%.*]] to <2 x i32>
 ; CHECK-NEXT:    [[XY:%.*]] = or disjoint <2 x i32> [[SLX]], [[ZEXT_Y]]
 ; CHECK-NEXT:    store <2 x i32> [[XY]], ptr [[ADDR:%.*]], align 4
-; CHECK-NEXT:    [[YX:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[XY]], <2 x i32> [[XY]], <2 x i32> <i32 8, i32 8>)
+; CHECK-NEXT:    [[YX:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[XY]], <2 x i32> [[XY]], <2 x i32> splat (i32 8))
 ; CHECK-NEXT:    ret <2 x i32> [[YX]]
 ;
   %zext.x = zext <2 x i8> %x to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll
index b2bc1abeaba568..883e8b784b4cf6 100644
--- a/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll
+++ b/llvm/test/Transforms/InstCombine/gep-combine-loop-invariant.ll
@@ -97,8 +97,8 @@ define void @PR37005(ptr %base, ptr %in) {
 ; CHECK-NEXT:    [[E2:%.*]] = getelementptr inbounds i8, ptr [[E1]], i64 48
 ; CHECK-NEXT:    [[E4:%.*]] = getelementptr inbounds ptr, ptr [[E2]], <2 x i64> <i64 0, i64 1>
 ; CHECK-NEXT:    [[PI1:%.*]] = ptrtoint <2 x ptr> [[E4]] to <2 x i64>
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], <i64 14, i64 14>
-; CHECK-NEXT:    [[SL1:%.*]] = and <2 x i64> [[TMP0]], <i64 1125899906842496, i64 1125899906842496>
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], splat (i64 14)
+; CHECK-NEXT:    [[SL1:%.*]] = and <2 x i64> [[TMP0]], splat (i64 1125899906842496)
 ; CHECK-NEXT:    [[E5:%.*]] = getelementptr inbounds i8, ptr [[BASE:%.*]], <2 x i64> [[SL1]]
 ; CHECK-NEXT:    [[E6:%.*]] = getelementptr inbounds i8, <2 x ptr> [[E5]], i64 80
 ; CHECK-NEXT:    call void @blackhole(<2 x ptr> [[E6]])
@@ -159,8 +159,8 @@ define void @PR37005_3(<2 x ptr> %base, ptr %in) {
 ; CHECK-NEXT:    [[E2:%.*]] = getelementptr inbounds i8, ptr [[E1]], i64 48
 ; CHECK-NEXT:    [[E4:%.*]] = getelementptr inbounds ptr, ptr [[E2]], <2 x i64> <i64 0, i64 1>
 ; CHECK-NEXT:    [[PI1:%.*]] = ptrtoint <2 x ptr> [[E4]] to <2 x i64>
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], <i64 14, i64 14>
-; CHECK-NEXT:    [[SL1:%.*]] = and <2 x i64> [[TMP0]], <i64 1125899906842496, i64 1125899906842496>
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], splat (i64 14)
+; CHECK-NEXT:    [[SL1:%.*]] = and <2 x i64> [[TMP0]], splat (i64 1125899906842496)
 ; CHECK-NEXT:    [[E5:%.*]] = getelementptr inbounds i8, <2 x ptr> [[BASE:%.*]], <2 x i64> [[SL1]]
 ; CHECK-NEXT:    [[E6:%.*]] = getelementptr inbounds i8, <2 x ptr> [[E5]], i64 80
 ; CHECK-NEXT:    call void @blackhole(<2 x ptr> [[E6]])
@@ -189,7 +189,7 @@ define void @PR51485(<2 x i64> %v) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[SL1:%.*]] = shl nuw nsw <2 x i64> [[V:%.*]], <i64 7, i64 7>
+; CHECK-NEXT:    [[SL1:%.*]] = shl nuw nsw <2 x i64> [[V:%.*]], splat (i64 7)
 ; CHECK-NEXT:    [[E5:%.*]] = getelementptr inbounds i8, ptr @PR51485, <2 x i64> [[SL1]]
 ; CHECK-NEXT:    [[E6:%.*]] = getelementptr inbounds i8, <2 x ptr> [[E5]], i64 80
 ; CHECK-NEXT:    call void @blackhole(<2 x ptr> [[E6]])
diff --git a/llvm/test/Transforms/InstCombine/gep-custom-dl.ll b/llvm/test/Transforms/InstCombine/gep-custom-dl.ll
index e8eaf4e24f7e42..c1c11c1acc7bbc 100644
--- a/llvm/test/Transforms/InstCombine/gep-custom-dl.ll
+++ b/llvm/test/Transforms/InstCombine/gep-custom-dl.ll
@@ -99,7 +99,7 @@ define i1 @test5(ptr %x, ptr %y) {
 
 define <2 x i1> @test6(<2 x i32> %X, <2 x ptr> %P) nounwind {
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %A = getelementptr inbounds %S, <2 x ptr> %P, <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 1>, <2 x i32> %X
@@ -111,7 +111,7 @@ define <2 x i1> @test6(<2 x i32> %X, <2 x ptr> %P) nounwind {
 ; Same as above, but indices scalarized.
 define <2 x i1> @test6b(<2 x i32> %X, <2 x ptr> %P) nounwind {
 ; CHECK-LABEL: @test6b(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %A = getelementptr inbounds %S, <2 x ptr> %P, i32 0, i32 1, <2 x i32> %X
diff --git a/llvm/test/Transforms/InstCombine/gep-vector.ll b/llvm/test/Transforms/InstCombine/gep-vector.ll
index 4d20323b789674..d8a65b69aceff7 100644
--- a/llvm/test/Transforms/InstCombine/gep-vector.ll
+++ b/llvm/test/Transforms/InstCombine/gep-vector.ll
@@ -5,7 +5,7 @@
 
 define <2 x ptr> @vectorindex1() {
 ; CHECK-LABEL: @vectorindex1(
-; CHECK-NEXT:    ret <2 x ptr> getelementptr inbounds ([64 x [8192 x i8]], ptr @block, <2 x i64> zeroinitializer, <2 x i64> <i64 0, i64 1>, <2 x i64> <i64 8192, i64 8192>)
+; CHECK-NEXT:    ret <2 x ptr> getelementptr inbounds ([64 x [8192 x i8]], ptr @block, <2 x i64> zeroinitializer, <2 x i64> <i64 0, i64 1>, <2 x i64> splat (i64 8192))
 ;
   %1 = getelementptr inbounds [64 x [8192 x i8]], ptr @block, i64 0, <2 x i64> <i64 0, i64 1>, i64 8192
   ret <2 x ptr> %1
@@ -13,7 +13,7 @@ define <2 x ptr> @vectorindex1() {
 
 define <2 x ptr> @vectorindex2() {
 ; CHECK-LABEL: @vectorindex2(
-; CHECK-NEXT:    ret <2 x ptr> getelementptr inbounds ([64 x [8192 x i8]], ptr @block, <2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 1>, <2 x i64> <i64 8191, i64 8193>)
+; CHECK-NEXT:    ret <2 x ptr> getelementptr inbounds ([64 x [8192 x i8]], ptr @block, <2 x i64> zeroinitializer, <2 x i64> splat (i64 1), <2 x i64> <i64 8191, i64 8193>)
 ;
   %1 = getelementptr inbounds [64 x [8192 x i8]], ptr @block, i64 0, i64 1, <2 x i64> <i64 8191, i64 8193>
   ret <2 x ptr> %1
diff --git a/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll b/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll
index 40caa57891369f..81a148b745906e 100644
--- a/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll
+++ b/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll
@@ -32,8 +32,8 @@ define i16 @t1(i16 %x) {
 ; Vectors
 define <2 x i8> @t2_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @t2_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i8> <i8 7, i8 7>, [[X:%.*]]
-; CHECK-NEXT:    [[MASK:%.*]] = lshr <2 x i8> <i8 -1, i8 -1>, [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i8> splat (i8 7), [[X:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = lshr <2 x i8> splat (i8 -1), [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i8> [[MASK]]
 ;
   %bitmask = shl <2 x i8> <i8 1, i8 1>, %x
@@ -43,8 +43,8 @@ define <2 x i8> @t2_vec(<2 x i8> %x) {
 }
 define <3 x i8> @t3_vec_poison0(<3 x i8> %x) {
 ; CHECK-LABEL: @t3_vec_poison0(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <3 x i8> <i8 7, i8 7, i8 7>, [[X:%.*]]
-; CHECK-NEXT:    [[MASK:%.*]] = lshr <3 x i8> <i8 -1, i8 -1, i8 -1>, [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <3 x i8> splat (i8 7), [[X:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = lshr <3 x i8> splat (i8 -1), [[TMP1]]
 ; CHECK-NEXT:    ret <3 x i8> [[MASK]]
 ;
   %bitmask = shl <3 x i8> <i8 1, i8 poison, i8 1>, %x
@@ -54,8 +54,8 @@ define <3 x i8> @t3_vec_poison0(<3 x i8> %x) {
 }
 define <3 x i8> @t4_vec_poison1(<3 x i8> %x) {
 ; CHECK-LABEL: @t4_vec_poison1(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <3 x i8> <i8 7, i8 7, i8 7>, [[X:%.*]]
-; CHECK-NEXT:    [[MASK:%.*]] = lshr <3 x i8> <i8 -1, i8 -1, i8 -1>, [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <3 x i8> splat (i8 7), [[X:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = lshr <3 x i8> splat (i8 -1), [[TMP1]]
 ; CHECK-NEXT:    ret <3 x i8> [[MASK]]
 ;
   %bitmask = shl <3 x i8> <i8 1, i8 1, i8 1>, %x
@@ -65,8 +65,8 @@ define <3 x i8> @t4_vec_poison1(<3 x i8> %x) {
 }
 define <3 x i8> @t5_vec_poison2(<3 x i8> %x) {
 ; CHECK-LABEL: @t5_vec_poison2(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <3 x i8> <i8 7, i8 7, i8 7>, [[X:%.*]]
-; CHECK-NEXT:    [[MASK:%.*]] = lshr <3 x i8> <i8 -1, i8 -1, i8 -1>, [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <3 x i8> splat (i8 7), [[X:%.*]]
+; CHECK-NEXT:    [[MASK:%.*]] = lshr <3 x i8> splat (i8 -1), [[TMP1]]
 ; CHECK-NEXT:    ret <3 x i8> [[MASK]]
 ;
   %bitmask = shl <3 x i8> <i8 1, i8 1, i8 poison>, %x
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 6a5f77e8cf6e08..8f28049cf5f581 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -222,7 +222,7 @@ define i1 @test13(i64 %X, ptr %P) {
 
 define <2 x i1> @test13_vector(<2 x i64> %X, <2 x ptr> %P) nounwind {
 ; CHECK-LABEL: @test13_vector(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i64> [[X:%.*]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i64> [[X:%.*]], splat (i64 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %A = getelementptr inbounds %S, <2 x ptr> %P, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 1>, <2 x i64> %X
@@ -250,7 +250,7 @@ define <2 x i1> @test13_fixed_fixed(i64 %X, ptr %P, <2 x i64> %y) nounwind {
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], <i64 3, i64 0>
 ; CHECK-NEXT:    [[A_IDX:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[B_IDX:%.*]] = shl nsw <2 x i64> [[Y:%.*]], <i64 4, i64 4>
+; CHECK-NEXT:    [[B_IDX:%.*]] = shl nsw <2 x i64> [[Y:%.*]], splat (i64 4)
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i64> [[A_IDX]], [[B_IDX]]
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
@@ -325,7 +325,7 @@ define i1 @test13_as1(i16 %X, ptr addrspace(1) %P) {
 
 define <2 x i1> @test13_vector_as1(<2 x i16> %X, <2 x ptr addrspace(1)> %P) {
 ; CHECK-LABEL: @test13_vector_as1(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i16> [[X:%.*]], <i16 -1, i16 -1>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i16> [[X:%.*]], splat (i16 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %A = getelementptr inbounds %S, <2 x ptr addrspace(1)> %P, <2 x i16> <i16 0, i16 0>, <2 x i32> <i32 1, i32 1>, <2 x i16> %X
diff --git a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll
index 3ebab115f65439..5786c30fc7423a 100644
--- a/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/high-bit-signmask-with-trunc.ll
@@ -48,7 +48,7 @@ define i32 @t3_exact(i64 %x) {
 
 define <2 x i32> @t4(<2 x i64> %x) {
 ; CHECK-LABEL: @t4(
-; CHECK-NEXT:    [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], <i64 63, i64 63>
+; CHECK-NEXT:    [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], splat (i64 63)
 ; CHECK-NEXT:    [[T1_NEG:%.*]] = trunc nsw <2 x i64> [[T0_NEG]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T1_NEG]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/high-bit-signmask.ll b/llvm/test/Transforms/InstCombine/high-bit-signmask.ll
index 53a7fb4461c57e..b4bad9e6bd8e1f 100644
--- a/llvm/test/Transforms/InstCombine/high-bit-signmask.ll
+++ b/llvm/test/Transforms/InstCombine/high-bit-signmask.ll
@@ -40,7 +40,7 @@ define i64 @t3_exact(i64 %x) {
 
 define <2 x i64> @t4(<2 x i64> %x) {
 ; CHECK-LABEL: @t4(
-; CHECK-NEXT:    [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], <i64 63, i64 63>
+; CHECK-NEXT:    [[T0_NEG:%.*]] = ashr <2 x i64> [[X:%.*]], splat (i64 63)
 ; CHECK-NEXT:    ret <2 x i64> [[T0_NEG]]
 ;
   %t0 = lshr <2 x i64> %x, <i64 63, i64 63>
diff --git a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll
index ab37becf0e3117..ff03b3d5b678c6 100644
--- a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll
+++ b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation-with-constant.ll
@@ -26,7 +26,7 @@ define i8 @t0(i8 %x) {
 
 define <2 x i8> @t1_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @t1_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -43, i8 -43>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -43)
 ; CHECK-NEXT:    [[NEGBIAS:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i8> [[NEGBIAS]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll
index 6049997db4d1ae..d4aa8b5dbf5055 100644
--- a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll
+++ b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll
@@ -44,7 +44,7 @@ define i8 @t1_commutative(i8 %y) {
 
 define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @t2_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[NEGBIAS:%.*]] = sub <2 x i8> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i8> [[NEGBIAS]]
@@ -57,7 +57,7 @@ define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) {
 
 define <2 x i8> @t3_vec_poison(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @t3_vec_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[NEGBIAS:%.*]] = sub <2 x i8> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i8> [[NEGBIAS]]
diff --git a/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll b/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll
index 2217666f0f49a2..f4e72c073e8a5c 100644
--- a/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll
+++ b/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll
@@ -34,7 +34,7 @@ define i8 @t1(i8 %x, i8 %y) {
 define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @t2_vec(
 ; CHECK-NEXT:    [[NOT_X_NOT:%.*]] = ashr <2 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i8> [[ASHR]]
 ;
   %not_x = xor <2 x i8> %x, <i8 -1, i8 -1>
@@ -45,7 +45,7 @@ define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) {
 define <2 x i8> @t3_vec_poison(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @t3_vec_poison(
 ; CHECK-NEXT:    [[NOT_X_NOT:%.*]] = ashr <2 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i8> [[ASHR]]
 ;
   %not_x = xor <2 x i8> %x, <i8 -1, i8 poison>
diff --git a/llvm/test/Transforms/InstCombine/hoist-xor-by-constant-from-xor-by-value.ll b/llvm/test/Transforms/InstCombine/hoist-xor-by-constant-from-xor-by-value.ll
index f92b10b0ccb371..39a76ae11ceef3 100644
--- a/llvm/test/Transforms/InstCombine/hoist-xor-by-constant-from-xor-by-value.ll
+++ b/llvm/test/Transforms/InstCombine/hoist-xor-by-constant-from-xor-by-value.ll
@@ -20,7 +20,7 @@ define i8 @t0_scalar(i8 %x, i8 %y) {
 define <2 x i8> @t1_splatvec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @t1_splatvec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = xor <2 x i8> [[TMP1]], <i8 42, i8 42>
+; CHECK-NEXT:    [[R:%.*]] = xor <2 x i8> [[TMP1]], splat (i8 42)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %i0 = xor <2 x i8> %x, <i8 42, i8 42>
diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll
index 0c141d4b8e73aa..276a178797b654 100644
--- a/llvm/test/Transforms/InstCombine/icmp-add.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-add.ll
@@ -1740,7 +1740,7 @@ define i1 @test1(i32 %a) {
 
 define <2 x i1> @test1vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test1vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], <i32 -5, i32 -5>
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 -5)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %b = add <2 x i32> %a, <i32 4, i32 4>
@@ -1760,7 +1760,7 @@ define i1 @test2(i32 %a) {
 
 define <2 x i1> @test2vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test2vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 4)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %b = sub <2 x i32> %a, <i32 4, i32 4>
@@ -1780,7 +1780,7 @@ define i1 @test3(i32 %a) {
 
 define <2 x i1> @test3vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test3vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i32> [[A:%.*]], <i32 2147483643, i32 2147483643>
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i32> [[A:%.*]], splat (i32 2147483643)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %b = add <2 x i32> %a, <i32 4, i32 4>
@@ -1818,7 +1818,7 @@ define { i32, i1 } @test4multiuse(i32 %a) {
 
 define <2 x i1> @test4vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test4vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], <i32 -4, i32 -4>
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i32> [[A:%.*]], splat (i32 -4)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %b = add <2 x i32> %a, <i32 2147483652, i32 2147483652>
@@ -1841,7 +1841,7 @@ define i1 @nsw_slt1(i8 %a) {
 
 define <2 x i1> @nsw_slt1_splat_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @nsw_slt1_splat_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %b = add nsw <2 x i8> %a, <i8 100, i8 100>
@@ -1864,7 +1864,7 @@ define i1 @nsw_slt2(i8 %a) {
 
 define <2 x i1> @nsw_slt2_splat_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @nsw_slt2_splat_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], splat (i8 127)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %b = add nsw <2 x i8> %a, <i8 -100, i8 -100>
@@ -1913,7 +1913,7 @@ define i1 @nsw_sgt1(i8 %a) {
 
 define <2 x i1> @nsw_sgt1_splat_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @nsw_sgt1_splat_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], splat (i8 127)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %b = add nsw <2 x i8> %a, <i8 -100, i8 -100>
@@ -1933,7 +1933,7 @@ define i1 @nsw_sgt2(i8 %a) {
 
 define <2 x i1> @nsw_sgt2_splat_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @nsw_sgt2_splat_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i8> [[A:%.*]], <i8 -126, i8 -126>
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -126)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %b = add nsw <2 x i8> %a, <i8 100, i8 100>
@@ -1958,7 +1958,7 @@ define i1 @slt_zero_add_nsw(i32 %a) {
 
 define <2 x i1> @slt_zero_add_nsw_splat_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @slt_zero_add_nsw_splat_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[A:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[A:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %add = add nsw <2 x i8> %a, <i8 1, i8 1>
@@ -2117,7 +2117,7 @@ define <2 x i1> @op_ugt_sum_vec_commute2(<2 x i8> %p1, <2 x i8> %p2) {
 ; CHECK-LABEL: @op_ugt_sum_vec_commute2(
 ; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> <i8 42, i8 -42>, [[P1:%.*]]
 ; CHECK-NEXT:    [[Y:%.*]] = sdiv <2 x i8> <i8 42, i8 -42>, [[P2:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X]], splat (i8 -1)
 ; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i8> [[Y]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
@@ -2149,7 +2149,7 @@ define <2 x i1> @sum_ult_op_vec_commute1(<2 x i8> %p1, <2 x i8> %p2) {
 ; CHECK-LABEL: @sum_ult_op_vec_commute1(
 ; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> <i8 42, i8 -42>, [[P1:%.*]]
 ; CHECK-NEXT:    [[Y:%.*]] = sdiv <2 x i8> <i8 -42, i8 42>, [[P2:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X]], splat (i8 -1)
 ; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i8> [[Y]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
@@ -2367,7 +2367,7 @@ define <2 x i1> @icmp_eq_add_non_splat(<2 x i32> %a) {
 
 define <2 x i1> @icmp_eq_add_undef2(<2 x i32> %a) {
 ; CHECK-LABEL: @icmp_eq_add_undef2(
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> [[A:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 5)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[ADD]], <i32 10, i32 undef>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -2378,7 +2378,7 @@ define <2 x i1> @icmp_eq_add_undef2(<2 x i32> %a) {
 
 define <2 x i1> @icmp_eq_add_non_splat2(<2 x i32> %a) {
 ; CHECK-LABEL: @icmp_eq_add_non_splat2(
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> [[A:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 5)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[ADD]], <i32 10, i32 11>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -2976,8 +2976,8 @@ define i1 @icmp_dec_nonzero(i16 %x) {
 
 define <2 x i1> @icmp_dec_nonzero_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_dec_nonzero_vec(
-; CHECK-NEXT:    [[O:%.*]] = or <2 x i32> [[X:%.*]], <i32 8, i32 8>
-; CHECK-NEXT:    [[I:%.*]] = add nsw <2 x i32> [[O]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[O:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 8)
+; CHECK-NEXT:    [[I:%.*]] = add nsw <2 x i32> [[O]], splat (i32 -1)
 ; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> [[I]], <i32 15, i32 17>
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
@@ -3068,8 +3068,8 @@ define i1 @ult_add_C2_neg_C_pow2(i8 %x) {
 
 define <2 x i1> @ult_add_C2_pow2_C_neg_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @ult_add_C2_pow2_C_neg_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -32, i8 -32>
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], <i8 -64, i8 -64>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -32)
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], splat (i8 -64)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %i = add <2 x i8> %x, <i8 32, i8 32>
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
index 684ece21b1166e..9befc3f66cf92a 100644
--- a/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-and-shift.ll
@@ -18,7 +18,7 @@ define i32 @icmp_eq_and_pow2_shl1(i32 %0) {
 
 define <2 x i32> @icmp_eq_and_pow2_shl1_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_eq_and_pow2_shl1_vec(
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], splat (i32 4)
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
@@ -44,7 +44,7 @@ define i32 @icmp_ne_and_pow2_shl1(i32 %0) {
 
 define <2 x i32> @icmp_ne_and_pow2_shl1_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_ne_and_pow2_shl1_vec(
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], splat (i32 4)
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
@@ -70,7 +70,7 @@ define i32 @icmp_eq_and_pow2_shl_pow2(i32 %0) {
 
 define <2 x i32> @icmp_eq_and_pow2_shl_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_eq_and_pow2_shl_pow2_vec(
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], splat (i32 2)
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
@@ -96,7 +96,7 @@ define i32 @icmp_ne_and_pow2_shl_pow2(i32 %0) {
 
 define <2 x i32> @icmp_ne_and_pow2_shl_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_ne_and_pow2_shl_pow2_vec(
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], splat (i32 2)
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
@@ -162,7 +162,7 @@ define i32 @icmp_eq_and_pow2_minus1_shl1(i32 %0) {
 
 define <2 x i32> @icmp_eq_and_pow2_minus1_shl1_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_eq_and_pow2_minus1_shl1_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], splat (i32 3)
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
@@ -188,7 +188,7 @@ define i32 @icmp_ne_and_pow2_minus1_shl1(i32 %0) {
 
 define <2 x i32> @icmp_ne_and_pow2_minus1_shl1_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_ne_and_pow2_minus1_shl1_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], splat (i32 4)
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
@@ -214,7 +214,7 @@ define i32 @icmp_eq_and_pow2_minus1_shl_pow2(i32 %0) {
 
 define <2 x i32> @icmp_eq_and_pow2_minus1_shl_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_eq_and_pow2_minus1_shl_pow2_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
@@ -240,7 +240,7 @@ define i32 @icmp_ne_and_pow2_minus1_shl_pow2(i32 %0) {
 
 define <2 x i32> @icmp_ne_and_pow2_minus1_shl_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_ne_and_pow2_minus1_shl_pow2_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TMP0:%.*]], splat (i32 2)
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
@@ -293,7 +293,7 @@ define i32 @icmp_eq_and1_lshr_pow2(i32 %0) {
 
 define <2 x i32> @icmp_eq_and1_lshr_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_eq_and1_lshr_pow2_vec(
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], splat (i32 3)
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
@@ -319,7 +319,7 @@ define i32 @icmp_ne_and1_lshr_pow2(i32 %0) {
 
 define <2 x i32> @icmp_ne_and1_lshr_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_ne_and1_lshr_pow2_vec(
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
@@ -356,7 +356,7 @@ define i32 @icmp_eq_and_pow2_lshr_pow2_case2(i32 %0) {
 
 define <2 x i32> @icmp_eq_and_pow2_lshr_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_eq_and_pow2_lshr_pow2_vec(
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP0:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
@@ -393,7 +393,7 @@ define i32 @icmp_ne_and_pow2_lshr_pow2_case2(i32 %0) {
 
 define <2 x i32> @icmp_ne_and_pow2_lshr_pow2_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @icmp_ne_and_pow2_lshr_pow2_vec(
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP0:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[CONV:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[CONV]]
 ;
@@ -673,7 +673,7 @@ define i1 @test_const_shr_and_2_ne_0_negative(i32 %b) {
 
 define <8 x i1> @test_const_shr_and_1_ne_0_v8i8_splat_negative(<8 x i8> %b) {
 ; CHECK-LABEL: @test_const_shr_and_1_ne_0_v8i8_splat_negative(
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>, [[B:%.*]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i8> splat (i8 42), [[B:%.*]]
 ; CHECK-NEXT:    [[CMP:%.*]] = trunc <8 x i8> [[SHR]] to <8 x i1>
 ; CHECK-NEXT:    ret <8 x i1> [[CMP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll
index f667e1aa105d04..8a100d66dbe86d 100644
--- a/llvm/test/Transforms/InstCombine/icmp-div-constant.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-div-constant.ll
@@ -16,8 +16,8 @@ define i1 @is_rem2_neg_i8(i8 %x) {
 
 define <2 x i1> @is_rem2_pos_v2i8(<2 x i8> %x) {
 ; CHECK-LABEL: @is_rem2_pos_v2i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -127, i8 -127>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], <i8 1, i8 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -127)
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %s = srem <2 x i8> %x, <i8 2, i8 2>
@@ -434,7 +434,7 @@ define i1 @lshr_x_by_const_cmp_x(i32 %x) {
 
 define <4 x i1> @lshr_by_const_cmp_sle_value(<4 x i32> %x) {
 ; CHECK-LABEL: @lshr_by_const_cmp_sle_value(
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %v = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
@@ -444,7 +444,7 @@ define <4 x i1> @lshr_by_const_cmp_sle_value(<4 x i32> %x) {
 
 define <4 x i1> @lshr_by_const_cmp_sle_value_non_splat(<4 x i32> %x) {
 ; CHECK-LABEL: @lshr_by_const_cmp_sle_value_non_splat(
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %v = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 5>
@@ -455,7 +455,7 @@ define <4 x i1> @lshr_by_const_cmp_sle_value_non_splat(<4 x i32> %x) {
 
 define <4 x i1> @ashr_by_const_cmp_sge_value_non_splat(<4 x i32> %x) {
 ; CHECK-LABEL: @ashr_by_const_cmp_sge_value_non_splat(
-; CHECK-NEXT:    [[R:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %v = ashr <4 x i32> %x, <i32 1, i32 2, i32 3, i32 4>
diff --git a/llvm/test/Transforms/InstCombine/icmp-logical.ll b/llvm/test/Transforms/InstCombine/icmp-logical.ll
index 4690ead483a5ba..50feb51092fd9e 100644
--- a/llvm/test/Transforms/InstCombine/icmp-logical.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-logical.ll
@@ -17,7 +17,7 @@ define i1 @masked_and_notallzeroes(i32 %A) {
 
 define <2 x i1> @masked_and_notallzeroes_splat(<2 x i32> %A) {
 ; CHECK-LABEL: @masked_and_notallzeroes_splat(
-; CHECK-NEXT:    [[MASK1:%.*]] = and <2 x i32> [[A:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    [[MASK1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 7)
 ; CHECK-NEXT:    [[TST1:%.*]] = icmp ne <2 x i32> [[MASK1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[TST1]]
 ;
@@ -371,7 +371,7 @@ define i1 @fold_mask_cmps_to_true_logical(i32 %x) {
 
 define <2 x i1> @nomask_splat_and_B_allones(<2 x i32> %A) {
 ; CHECK-LABEL: @nomask_splat_and_B_allones(
-; CHECK-NEXT:    [[RES:%.*]] = icmp ugt <2 x i32> [[A:%.*]], <i32 -268435457, i32 -268435457>
+; CHECK-NEXT:    [[RES:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 -268435457)
 ; CHECK-NEXT:    ret <2 x i1> [[RES]]
 ;
   %tst1 = icmp slt <2 x i32> %A, <i32 0, i32 poison>
@@ -383,8 +383,8 @@ define <2 x i1> @nomask_splat_and_B_allones(<2 x i32> %A) {
 
 define <2 x i1> @nomask_splat_and_B_mixed(<2 x i32> %A) {
 ; CHECK-LABEL: @nomask_splat_and_B_mixed(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 -268435456, i32 -268435456>
-; CHECK-NEXT:    [[RES:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 1879048192, i32 1879048192>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -268435456)
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 1879048192)
 ; CHECK-NEXT:    ret <2 x i1> [[RES]]
 ;
   %tst1 = icmp sgt <2 x i32> %A, <i32 -1, i32 poison>
@@ -476,8 +476,8 @@ define i1 @masked_icmps_mask_notallzeros_bmask_mixed_1(i32 %x) {
 
 define <2 x i1> @masked_icmps_mask_notallzeros_bmask_mixed_1_vector(<2 x i32> %x) {
 ; CHECK-LABEL: @masked_icmps_mask_notallzeros_bmask_mixed_1_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 15, i32 15>
-; CHECK-NEXT:    [[T5:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 9, i32 9>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 15)
+; CHECK-NEXT:    [[T5:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 9)
 ; CHECK-NEXT:    ret <2 x i1> [[T5]]
 ;
   %t1 = and <2 x i32> %x, <i32 12, i32 12>
@@ -1778,8 +1778,8 @@ define i1 @masked_icmps_bmask_notmixed_or(i32 %A) {
 
 define <2 x i1> @masked_icmps_bmask_notmixed_or_vec(<2 x i8> %A) {
 ; CHECK-LABEL: @masked_icmps_bmask_notmixed_or_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[A:%.*]], <i8 15, i8 15>
-; CHECK-NEXT:    [[RES:%.*]] = icmp eq <2 x i8> [[TMP1]], <i8 3, i8 3>
+; CHECK-NEXT:    [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 15)
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq <2 x i8> [[MASK1]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i1> [[RES]]
 ;
   %mask1 = and <2 x i8> %A, <i8 15, i8 15> ; 0x0f
@@ -1792,9 +1792,9 @@ define <2 x i1> @masked_icmps_bmask_notmixed_or_vec(<2 x i8> %A) {
 
 define <2 x i1> @masked_icmps_bmask_notmixed_or_vec_poison1(<2 x i8> %A) {
 ; CHECK-LABEL: @masked_icmps_bmask_notmixed_or_vec_poison1(
-; CHECK-NEXT:    [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], <i8 15, i8 15>
+; CHECK-NEXT:    [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 15)
 ; CHECK-NEXT:    [[TST1:%.*]] = icmp eq <2 x i8> [[MASK1]], <i8 3, i8 poison>
-; CHECK-NEXT:    [[TST2:%.*]] = icmp eq <2 x i8> [[A]], <i8 -13, i8 -13>
+; CHECK-NEXT:    [[TST2:%.*]] = icmp eq <2 x i8> [[A]], splat (i8 -13)
 ; CHECK-NEXT:    [[RES:%.*]] = or <2 x i1> [[TST1]], [[TST2]]
 ; CHECK-NEXT:    ret <2 x i1> [[RES]]
 ;
@@ -1808,8 +1808,8 @@ define <2 x i1> @masked_icmps_bmask_notmixed_or_vec_poison1(<2 x i8> %A) {
 
 define <2 x i1> @masked_icmps_bmask_notmixed_or_vec_poison2(<2 x i8> %A) {
 ; CHECK-LABEL: @masked_icmps_bmask_notmixed_or_vec_poison2(
-; CHECK-NEXT:    [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], <i8 15, i8 15>
-; CHECK-NEXT:    [[TST1:%.*]] = icmp eq <2 x i8> [[MASK1]], <i8 3, i8 3>
+; CHECK-NEXT:    [[MASK1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 15)
+; CHECK-NEXT:    [[TST1:%.*]] = icmp eq <2 x i8> [[MASK1]], splat (i8 3)
 ; CHECK-NEXT:    [[TST2:%.*]] = icmp eq <2 x i8> [[A]], <i8 -13, i8 poison>
 ; CHECK-NEXT:    [[RES:%.*]] = or <2 x i1> [[TST1]], [[TST2]]
 ; CHECK-NEXT:    ret <2 x i1> [[RES]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll
index 7e7f087ca7112c..7fa75184c1cf3b 100644
--- a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll
@@ -158,7 +158,7 @@ define i32 @pr40493_neg3(i32 %area) {
 
 define <4 x i1> @pr40493_vec1(<4 x i32> %area) {
 ; CHECK-LABEL: @pr40493_vec1(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[AREA:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[AREA:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -171,7 +171,7 @@ define <4 x i1> @pr40493_vec1(<4 x i32> %area) {
 define <4 x i1> @pr40493_vec2(<4 x i32> %area) {
 ; CHECK-LABEL: @pr40493_vec2(
 ; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i32> [[AREA:%.*]], <i32 12, i32 12, i32 12, i32 undef>
-; CHECK-NEXT:    [[REM:%.*]] = and <4 x i32> [[MUL]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[REM:%.*]] = and <4 x i32> [[MUL]], splat (i32 4)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> [[REM]], zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -183,7 +183,7 @@ define <4 x i1> @pr40493_vec2(<4 x i32> %area) {
 
 define <4 x i1> @pr40493_vec3(<4 x i32> %area) {
 ; CHECK-LABEL: @pr40493_vec3(
-; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i32> [[AREA:%.*]], <i32 12, i32 12, i32 12, i32 12>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i32> [[AREA:%.*]], splat (i32 12)
 ; CHECK-NEXT:    [[REM:%.*]] = and <4 x i32> [[MUL]], <i32 4, i32 4, i32 4, i32 undef>
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> [[REM]], zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-mul.ll b/llvm/test/Transforms/InstCombine/icmp-mul.ll
index a14f342ae2482b..962a5e79db4933 100644
--- a/llvm/test/Transforms/InstCombine/icmp-mul.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-mul.ll
@@ -257,7 +257,7 @@ define i1 @eq_nsw_rem_zero(i8 %x) {
 
 define <2 x i1> @ne_nsw_rem_zero(<2 x i8> %x) {
 ; CHECK-LABEL: @ne_nsw_rem_zero(
-; CHECK-NEXT:    [[B:%.*]] = icmp ne <2 x i8> [[X:%.*]], <i8 -6, i8 -6>
+; CHECK-NEXT:    [[B:%.*]] = icmp ne <2 x i8> [[X:%.*]], splat (i8 -6)
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
   %a = mul nsw <2 x i8> %x, <i8 5, i8 5>
@@ -270,7 +270,7 @@ define <2 x i1> @ne_nsw_rem_zero(<2 x i8> %x) {
 define <2 x i1> @ne_nsw_rem_zero_undef1(<2 x i8> %x) {
 ; CHECK-LABEL: @ne_nsw_rem_zero_undef1(
 ; CHECK-NEXT:    [[A:%.*]] = mul nsw <2 x i8> [[X:%.*]], <i8 5, i8 undef>
-; CHECK-NEXT:    [[B:%.*]] = icmp ne <2 x i8> [[A]], <i8 -30, i8 -30>
+; CHECK-NEXT:    [[B:%.*]] = icmp ne <2 x i8> [[A]], splat (i8 -30)
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
   %a = mul nsw <2 x i8> %x, <i8 5, i8 undef>
@@ -282,7 +282,7 @@ define <2 x i1> @ne_nsw_rem_zero_undef1(<2 x i8> %x) {
 
 define <2 x i1> @ne_nsw_rem_zero_undef2(<2 x i8> %x) {
 ; CHECK-LABEL: @ne_nsw_rem_zero_undef2(
-; CHECK-NEXT:    [[A:%.*]] = mul nsw <2 x i8> [[X:%.*]], <i8 5, i8 5>
+; CHECK-NEXT:    [[A:%.*]] = mul nsw <2 x i8> [[X:%.*]], splat (i8 5)
 ; CHECK-NEXT:    [[B:%.*]] = icmp ne <2 x i8> [[A]], <i8 -30, i8 undef>
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
@@ -328,7 +328,7 @@ define i1 @ne_nsw_rem_nz(i8 %x) {
 
 define <2 x i1> @eq_nuw_rem_zero(<2 x i8> %x) {
 ; CHECK-LABEL: @eq_nuw_rem_zero(
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 4)
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
   %a = mul nuw <2 x i8> %x, <i8 5, i8 5>
@@ -341,7 +341,7 @@ define <2 x i1> @eq_nuw_rem_zero(<2 x i8> %x) {
 define <2 x i1> @eq_nuw_rem_zero_undef1(<2 x i8> %x) {
 ; CHECK-LABEL: @eq_nuw_rem_zero_undef1(
 ; CHECK-NEXT:    [[A:%.*]] = mul nuw <2 x i8> [[X:%.*]], <i8 undef, i8 5>
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i8> [[A]], <i8 20, i8 20>
+; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i8> [[A]], splat (i8 20)
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
   %a = mul nuw <2 x i8> %x, <i8 undef, i8 5>
@@ -353,7 +353,7 @@ define <2 x i1> @eq_nuw_rem_zero_undef1(<2 x i8> %x) {
 
 define <2 x i1> @eq_nuw_rem_zero_undef2(<2 x i8> %x) {
 ; CHECK-LABEL: @eq_nuw_rem_zero_undef2(
-; CHECK-NEXT:    [[A:%.*]] = mul nuw <2 x i8> [[X:%.*]], <i8 5, i8 5>
+; CHECK-NEXT:    [[A:%.*]] = mul nuw <2 x i8> [[X:%.*]], splat (i8 5)
 ; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i8> [[A]], <i8 undef, i8 20>
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
@@ -783,7 +783,7 @@ define i1 @eq_mul_constants_with_tz(i32 %x, i32 %y) {
 define <2 x i1> @eq_mul_constants_with_tz_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @eq_mul_constants_with_tz_splat(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1073741823)
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
@@ -1042,7 +1042,7 @@ define i1 @mul_evenC_ne(i8 %v) {
 
 define <2 x i1> @mul_oddC_ne_vec(<2 x i8> %v) {
 ; CHECK-LABEL: @mul_oddC_ne_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i8> [[V:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i8> [[V:%.*]], splat (i8 4)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %mul = mul <2 x i8> %v, <i8 3, i8 3>
@@ -1110,7 +1110,7 @@ define i1 @mul_xy_z_assumeodd_eq(i8 %x, i8 %y, i8 %z) {
 
 define <2 x i1> @reused_mul_nsw_xy_z_setnonzero_vec_ne(<2 x i8> %x, <2 x i8> %y, <2 x i8> %zi) {
 ; CHECK-LABEL: @reused_mul_nsw_xy_z_setnonzero_vec_ne(
-; CHECK-NEXT:    [[Z:%.*]] = or <2 x i8> [[ZI:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[Z:%.*]] = or <2 x i8> [[ZI:%.*]], splat (i8 4)
 ; CHECK-NEXT:    [[MULY:%.*]] = mul nsw <2 x i8> [[Y:%.*]], [[Z]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i8> [[Y]], [[X:%.*]]
 ; CHECK-NEXT:    call void @usev2xi8(<2 x i8> [[MULY]])
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
index 09c9c1ebc83159..0bcbc3bcb050b0 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-and-x.ll
@@ -149,7 +149,7 @@ define <2 x i1> @icmp_sgt_x_negy_fail_partial(<2 x i8> %x, <2 x i8> %yy) {
 
 define <2 x i1> @icmp_sle_x_posy(<2 x i8> %x, <2 x i8> %yy) {
 ; CHECK-LABEL: @icmp_sle_x_posy(
-; CHECK-NEXT:    [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %y = and <2 x i8> %yy, <i8 127, i8 127>
@@ -187,7 +187,7 @@ define i1 @icmp_sgt_x_posy(i8 %x, i8 %y) {
 
 define <2 x i1> @icmp_sgt_negx_y(<2 x i8> %xx, <2 x i8> %y) {
 ; CHECK-LABEL: @icmp_sgt_negx_y(
-; CHECK-NEXT:    [[Z:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[Z:%.*]] = icmp sgt <2 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %x = or <2 x i8> %xx, <i8 128, i8 128>
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
index 93eeab4732185b..993325f6ff0b09 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-or-x.ll
@@ -31,7 +31,7 @@ define <2 x i1> @or_ule(<2 x i8> %x, <2 x i8> %y) {
 define <2 x i1> @or_slt_pos(<2 x i8> %xx, <2 x i8> %yy, <2 x i8> %z) {
 ; CHECK-LABEL: @or_slt_pos(
 ; CHECK-NEXT:    [[X:%.*]] = add <2 x i8> [[XX:%.*]], [[Z:%.*]]
-; CHECK-NEXT:    [[Y:%.*]] = and <2 x i8> [[YY:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[Y:%.*]] = and <2 x i8> [[YY:%.*]], splat (i8 127)
 ; CHECK-NEXT:    [[XN1:%.*]] = or <2 x i8> [[X]], [[Y]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp slt <2 x i8> [[X]], [[XN1]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
@@ -192,7 +192,7 @@ define i1 @or_slt_intmin(i8 %x) {
 define <2 x i1> @or_slt_intmin_2(<2 x i8> %xx, <2 x i8> %z) {
 ; CHECK-LABEL: @or_slt_intmin_2(
 ; CHECK-NEXT:    [[X:%.*]] = add <2 x i8> [[XX:%.*]], [[Z:%.*]]
-; CHECK-NEXT:    [[XN1:%.*]] = or <2 x i8> [[X]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[XN1:%.*]] = or <2 x i8> [[X]], splat (i8 -128)
 ; CHECK-NEXT:    [[R:%.*]] = icmp slt <2 x i8> [[X]], [[XN1]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
@@ -268,7 +268,7 @@ pos:
 define <2 x i1> @or_sgt_intmin_2(<2 x i8> %xx, <2 x i8> %z) {
 ; CHECK-LABEL: @or_sgt_intmin_2(
 ; CHECK-NEXT:    [[X:%.*]] = add <2 x i8> [[XX:%.*]], [[Z:%.*]]
-; CHECK-NEXT:    [[XN1:%.*]] = or <2 x i8> [[X]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[XN1:%.*]] = or <2 x i8> [[X]], splat (i8 -128)
 ; CHECK-NEXT:    [[R:%.*]] = icmp sgt <2 x i8> [[X]], [[XN1]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll b/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll
index a4e7acbca930dc..0635bb31c44fcf 100644
--- a/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-of-xor-x.ll
@@ -384,8 +384,8 @@ define i1 @xor_ult(i8 %x) {
 
 define <2 x i1> @xor_sgt(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @xor_sgt(
-; CHECK-NEXT:    [[YZ:%.*]] = and <2 x i8> [[Y:%.*]], <i8 31, i8 31>
-; CHECK-NEXT:    [[Y1:%.*]] = or disjoint <2 x i8> [[YZ]], <i8 64, i8 64>
+; CHECK-NEXT:    [[YZ:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 31)
+; CHECK-NEXT:    [[Y1:%.*]] = or disjoint <2 x i8> [[YZ]], splat (i8 64)
 ; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i8> [[X:%.*]], [[Y1]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp sgt <2 x i8> [[XOR]], [[X]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
@@ -399,8 +399,8 @@ define <2 x i1> @xor_sgt(<2 x i8> %x, <2 x i8> %y) {
 
 define <2 x i1> @xor_sgt_fail_no_known_msb(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @xor_sgt_fail_no_known_msb(
-; CHECK-NEXT:    [[YZ:%.*]] = and <2 x i8> [[Y:%.*]], <i8 55, i8 55>
-; CHECK-NEXT:    [[Y1:%.*]] = or disjoint <2 x i8> [[YZ]], <i8 8, i8 8>
+; CHECK-NEXT:    [[YZ:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 55)
+; CHECK-NEXT:    [[Y1:%.*]] = or disjoint <2 x i8> [[YZ]], splat (i8 8)
 ; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i8> [[X:%.*]], [[Y1]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp sgt <2 x i8> [[XOR]], [[X]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
@@ -426,7 +426,7 @@ define i1 @xor_slt_2(i8 %x, i8 %y, i8 %z) {
 define <2 x i1> @xor_sgt_intmin_2(<2 x i8> %xx, <2 x i8> %yy, <2 x i8> %z) {
 ; CHECK-LABEL: @xor_sgt_intmin_2(
 ; CHECK-NEXT:    [[X:%.*]] = add <2 x i8> [[XX:%.*]], [[Z:%.*]]
-; CHECK-NEXT:    [[Y:%.*]] = or <2 x i8> [[YY:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[Y:%.*]] = or <2 x i8> [[YY:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i8> [[X]], [[Y]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp sgt <2 x i8> [[X]], [[XOR]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-or.ll b/llvm/test/Transforms/InstCombine/icmp-or.ll
index 36b3216196f846..f5011d52de64f6 100644
--- a/llvm/test/Transforms/InstCombine/icmp-or.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-or.ll
@@ -16,8 +16,8 @@ define i1 @set_low_bit_mask_eq(i8 %x) {
 
 define <2 x i1> @set_low_bit_mask_ne(<2 x i8> %x) {
 ; CHECK-LABEL: @set_low_bit_mask_ne(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -4, i8 -4>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i8> [[TMP1]], <i8 16, i8 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -4)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i8> [[TMP1]], splat (i8 16)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %sub = or <2 x i8> %x, <i8 3, i8 3>
@@ -220,7 +220,7 @@ define i1 @eq_const_mask_use2(i8 %x, i8 %y) {
 
 define <2 x i1> @decrement_slt_0(<2 x i8> %x) {
 ; CHECK-LABEL: @decrement_slt_0(
-; CHECK-NEXT:    [[R:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %dec = add <2 x i8> %x, <i8 -1, i8 -1>
diff --git a/llvm/test/Transforms/InstCombine/icmp-power2-and-icmp-shifted-mask.ll b/llvm/test/Transforms/InstCombine/icmp-power2-and-icmp-shifted-mask.ll
index a151fd96ef4981..edbaffa6a12b9b 100644
--- a/llvm/test/Transforms/InstCombine/icmp-power2-and-icmp-shifted-mask.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-power2-and-icmp-shifted-mask.ll
@@ -373,7 +373,7 @@ define i1 @icmp_power2_and_icmp_shifted_mask_swapped_8_12_mask_overlap_fail(i32
 ; Vector of 1 reduction
 define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147483647(<1 x i32> %x) {
 ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147483647(
-; CHECK-NEXT:    [[T4:%.*]] = icmp ult <1 x i32> [[X:%.*]], <i32 2147483647>
+; CHECK-NEXT:    [[T4:%.*]] = icmp ult <1 x i32> [[X:%.*]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <1 x i1> [[T4]]
 ;
   %t1 = icmp ult <1 x i32> %x, <i32 2147483648>
@@ -385,7 +385,7 @@ define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147483647(
 
 define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_2147483647(<1 x i32> %x) {
 ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_2147483647(
-; CHECK-NEXT:    [[T4:%.*]] = icmp ult <1 x i32> [[X:%.*]], <i32 2147483647>
+; CHECK-NEXT:    [[T4:%.*]] = icmp ult <1 x i32> [[X:%.*]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <1 x i1> [[T4]]
 ;
   %t1 = icmp ult <1 x i32> %x, <i32 2147483648>
@@ -529,9 +529,9 @@ define <6 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_64_others(<6 x
 ; Vector of 0 of 1 compatible, no change
 define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147482647_gap_in_mask_fail(<1 x i32> %x) {
 ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147482647_gap_in_mask_fail(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <1 x i32> [[X:%.*]], <i32 -1>
-; CHECK-NEXT:    [[T2:%.*]] = and <1 x i32> [[X]], <i32 2147482647>
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <1 x i32> [[T2]], <i32 2147482647>
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <1 x i32> [[X:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[T2:%.*]] = and <1 x i32> [[X]], splat (i32 2147482647)
+; CHECK-NEXT:    [[T3:%.*]] = icmp ne <1 x i32> [[T2]], splat (i32 2147482647)
 ; CHECK-NEXT:    [[T4:%.*]] = and <1 x i1> [[T1]], [[T3]]
 ; CHECK-NEXT:    ret <1 x i1> [[T4]]
 ;
@@ -544,9 +544,9 @@ define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_2147482647_
 
 define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_2147482647_gap_in_mask_fail(<1 x i32> %x) {
 ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_2147482647_gap_in_mask_fail(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <1 x i32> [[X:%.*]], <i32 -1>
-; CHECK-NEXT:    [[T2:%.*]] = and <1 x i32> [[X]], <i32 2147482647>
-; CHECK-NEXT:    [[T3:%.*]] = icmp ne <1 x i32> [[T2]], <i32 2147482647>
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <1 x i32> [[X:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[T2:%.*]] = and <1 x i32> [[X]], splat (i32 2147482647)
+; CHECK-NEXT:    [[T3:%.*]] = icmp ne <1 x i32> [[T2]], splat (i32 2147482647)
 ; CHECK-NEXT:    [[T4:%.*]] = and <1 x i1> [[T3]], [[T1]]
 ; CHECK-NEXT:    ret <1 x i1> [[T4]]
 ;
@@ -560,7 +560,7 @@ define <1 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_214
 ; Vector 1 of 2 compatible, no change
 define <2 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_1073741823_gap_between_masks_fail(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_2147483648_1073741823_gap_between_masks_fail(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[T2:%.*]] = and <2 x i32> [[X]], <i32 1610612736, i32 1073741823>
 ; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[T2]], <i32 1610612736, i32 1073741823>
 ; CHECK-NEXT:    [[T4:%.*]] = and <2 x i1> [[T1]], [[T3]]
@@ -575,7 +575,7 @@ define <2 x i1> @icmp_power2_and_icmp_shifted_mask_vector_2147483648_1073741823_
 
 define <2 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_1073741823_gap_between_masks_fail(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_1073741823_gap_between_masks_fail(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[T2:%.*]] = and <2 x i32> [[X]], <i32 1610612736, i32 1073741823>
 ; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[T2]], <i32 1610612736, i32 1073741823>
 ; CHECK-NEXT:    [[T4:%.*]] = and <2 x i1> [[T3]], [[T1]]
@@ -591,7 +591,7 @@ define <2 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_2147483648_107
 ; Vector 1 of 7 compatible, no change
 define <7 x i1> @icmp_power2_and_icmp_shifted_mask_vector_128_1_of_7_fail(<7 x i8> %x) {
 ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_128_1_of_7_fail(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <7 x i8> [[X:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <7 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[T2:%.*]] = and <7 x i8> [[X]], <i8 125, i8 122, i8 116, i8 104, i8 80, i8 32, i8 64>
 ; CHECK-NEXT:    [[T3:%.*]] = icmp ne <7 x i8> [[T2]], <i8 125, i8 122, i8 116, i8 104, i8 80, i8 32, i8 64>
 ; CHECK-NEXT:    [[T4:%.*]] = and <7 x i1> [[T1]], [[T3]]
@@ -606,7 +606,7 @@ define <7 x i1> @icmp_power2_and_icmp_shifted_mask_vector_128_1_of_7_fail(<7 x i
 
 define <7 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_1_of_7_fail(<7 x i8> %x) {
 ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_1_of_7_fail(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <7 x i8> [[X:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <7 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[T2:%.*]] = and <7 x i8> [[X]], <i8 125, i8 122, i8 116, i8 104, i8 80, i8 32, i8 64>
 ; CHECK-NEXT:    [[T3:%.*]] = icmp ne <7 x i8> [[T2]], <i8 125, i8 122, i8 116, i8 104, i8 80, i8 32, i8 64>
 ; CHECK-NEXT:    [[T4:%.*]] = and <7 x i1> [[T3]], [[T1]]
@@ -622,7 +622,7 @@ define <7 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_1_of_7_fai
 ; Vector 0 of 6 compatible, no change
 define <6 x i1> @icmp_power2_and_icmp_shifted_mask_vector_128_0_of_6_fail(<6 x i8> %x) {
 ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_128_0_of_6_fail(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <6 x i8> [[X:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <6 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[T2:%.*]] = and <6 x i8> [[X]], <i8 125, i8 122, i8 116, i8 104, i8 80, i8 32>
 ; CHECK-NEXT:    [[T3:%.*]] = icmp ne <6 x i8> [[T2]], <i8 125, i8 122, i8 116, i8 104, i8 80, i8 32>
 ; CHECK-NEXT:    [[T4:%.*]] = and <6 x i1> [[T1]], [[T3]]
@@ -637,7 +637,7 @@ define <6 x i1> @icmp_power2_and_icmp_shifted_mask_vector_128_0_of_6_fail(<6 x i
 
 define <6 x i1> @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_0_of_6_fail(<6 x i8> %x) {
 ; CHECK-LABEL: @icmp_power2_and_icmp_shifted_mask_vector_swapped_128_0_of_6_fail(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <6 x i8> [[X:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <6 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[T2:%.*]] = and <6 x i8> [[X]], <i8 125, i8 122, i8 116, i8 104, i8 80, i8 32>
 ; CHECK-NEXT:    [[T3:%.*]] = icmp ne <6 x i8> [[T2]], <i8 125, i8 122, i8 116, i8 104, i8 80, i8 32>
 ; CHECK-NEXT:    [[T4:%.*]] = and <6 x i1> [[T3]], [[T1]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-range.ll b/llvm/test/Transforms/InstCombine/icmp-range.ll
index 8b690826a7bf9a..d2a4e3e7a8376a 100644
--- a/llvm/test/Transforms/InstCombine/icmp-range.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-range.ll
@@ -545,7 +545,7 @@ define i1 @sub_ule_zext(i1 %b, i8 %x, i8 %y) {
 
 define <2 x i1> @sub_ult_and(<2 x i8> %b, <2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @sub_ult_and(
-; CHECK-NEXT:    [[A:%.*]] = and <2 x i8> [[B:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[A:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[S]], [[A]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
@@ -775,7 +775,7 @@ define i1 @sub_ult_sext(i1 %b, i8 %x, i8 %y) {
 
 define <2 x i1> @sub_ule_ashr(<2 x i8> %b, <2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @sub_ule_ashr(
-; CHECK-NEXT:    [[A:%.*]] = ashr <2 x i8> [[B:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[A:%.*]] = ashr <2 x i8> [[B:%.*]], splat (i8 7)
 ; CHECK-NEXT:    [[S:%.*]] = sub <2 x i8> [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp ule <2 x i8> [[S]], [[A]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
@@ -1345,7 +1345,7 @@ define <2 x i1> @icmp_ne_zext_eq_zero_vec(<2 x i32> %a) {
 
 define <2 x i1> @icmp_ne_zext_ne_zero_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @icmp_ne_zext_ne_zero_vec(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
 ;
   %cmp = icmp ne <2 x i32> %a, <i32 0, i32 0>
@@ -1356,7 +1356,7 @@ define <2 x i1> @icmp_ne_zext_ne_zero_vec(<2 x i32> %a) {
 
 define <2 x i1> @icmp_ne_zext_eq_one_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @icmp_ne_zext_eq_one_vec(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
 ;
   %cmp = icmp eq <2 x i32> %a, <i32 1, i32 1>
@@ -1530,8 +1530,8 @@ define <2 x i1> @icmp_ne_sext_eq_zero_vec(<2 x i32> %a) {
 
 define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @icmp_ne_sext_ne_zero_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 -2, i32 -2>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
 ;
   %cmp = icmp ne <2 x i32> %a, <i32 0, i32 0>
@@ -1542,8 +1542,8 @@ define <2 x i1> @icmp_ne_sext_ne_zero_vec(<2 x i32> %a) {
 
 define <2 x i1> @icmp_ne_sext_eq_allones_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @icmp_ne_sext_eq_allones_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 -2, i32 -2>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP1]]
 ;
   %cmp = icmp eq <2 x i32> %a, <i32 -1, i32 -1>
diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-1-overflow.ll b/llvm/test/Transforms/InstCombine/icmp-shl-1-overflow.ll
index 0803c03efdbf49..f8d672b03cbdda 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shl-1-overflow.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shl-1-overflow.ll
@@ -16,7 +16,7 @@ define i1 @icmp_shl_ugt_1(i8 %x) {
 
 define <2 x i1> @icmp_shl_ugt_2(<2 x i32> %_x) {
 ; CHECK-LABEL: @icmp_shl_ugt_2(
-; CHECK-NEXT:    [[X:%.*]] = add <2 x i32> [[_X:%.*]], <i32 42, i32 42>
+; CHECK-NEXT:    [[X:%.*]] = add <2 x i32> [[_X:%.*]], splat (i32 42)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -78,7 +78,7 @@ define <4 x i1> @icmp_shl_ult_2(<4 x i4> %_x) {
 
 define <2 x i1> @icmp_shl_ule_1(<2 x i8> %x) {
 ; CHECK-LABEL: @icmp_shl_ule_1(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %add = shl <2 x i8> %x, <i8 1, i8 poison>
@@ -110,7 +110,7 @@ define i1 @icmp_shl_eq_1(i8 %x) {
 
 define <2 x i1> @icmp_shl_eq_2(<2 x i8> %_x) {
 ; CHECK-LABEL: @icmp_shl_eq_2(
-; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> <i8 42, i8 42>, [[_X:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[_X:%.*]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll
index 6b9ea1f8ef97e2..06175f1635a90d 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shl-nsw.ll
@@ -194,7 +194,7 @@ define i1 @icmp_sgt11(i8 %x) {
 
 define <2 x i1> @icmp_sgt11_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @icmp_sgt11_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl nsw <2 x i8> %x, <i8 7, i8 7>
diff --git a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll
index 9f50265004f01b..abd2021fae116f 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shl-nuw.ll
@@ -43,7 +43,7 @@ define <2 x i1> @icmp_ule_16x2(<2 x i64>) {
 
 define <2 x i1> @icmp_ule_16x2_nonzero(<2 x i64>) {
 ; CHECK-LABEL: @icmp_ule_16x2_nonzero(
-; CHECK-NEXT:    [[D:%.*]] = icmp ult <2 x i64> [[TMP0:%.*]], <i64 4, i64 4>
+; CHECK-NEXT:    [[D:%.*]] = icmp ult <2 x i64> [[TMP0:%.*]], splat (i64 4)
 ; CHECK-NEXT:    ret <2 x i1> [[D]]
 ;
   %c = shl nuw <2 x i64> %0, <i64 16, i64 16>
@@ -53,7 +53,7 @@ define <2 x i1> @icmp_ule_16x2_nonzero(<2 x i64>) {
 
 define <2 x i1> @icmp_ule_12x2(<2 x i64>) {
 ; CHECK-LABEL: @icmp_ule_12x2(
-; CHECK-NEXT:    [[D:%.*]] = icmp ult <2 x i64> [[TMP0:%.*]], <i64 4, i64 4>
+; CHECK-NEXT:    [[D:%.*]] = icmp ult <2 x i64> [[TMP0:%.*]], splat (i64 4)
 ; CHECK-NEXT:    ret <2 x i1> [[D]]
 ;
   %c = shl nuw <2 x i64> %0, <i64 12, i64 12>
@@ -73,7 +73,7 @@ define i1 @icmp_ult_8(i64) {
 
 define <2 x i1> @icmp_uge_8x2(<2 x i16>) {
 ; CHECK-LABEL: @icmp_uge_8x2(
-; CHECK-NEXT:    [[D:%.*]] = icmp ugt <2 x i16> [[TMP0:%.*]], <i16 15, i16 15>
+; CHECK-NEXT:    [[D:%.*]] = icmp ugt <2 x i16> [[TMP0:%.*]], splat (i16 15)
 ; CHECK-NEXT:    ret <2 x i1> [[D]]
 ;
   %c = shl nuw <2 x i16> %0, <i16 8, i16 8>
@@ -83,7 +83,7 @@ define <2 x i1> @icmp_uge_8x2(<2 x i16>) {
 
 define <2 x i1> @icmp_ugt_16x2(<2 x i32>) {
 ; CHECK-LABEL: @icmp_ugt_16x2(
-; CHECK-NEXT:    [[D:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], <i32 15, i32 15>
+; CHECK-NEXT:    [[D:%.*]] = icmp ugt <2 x i32> [[TMP0:%.*]], splat (i32 15)
 ; CHECK-NEXT:    ret <2 x i1> [[D]]
 ;
   %c = shl nuw <2 x i32> %0, <i32 16, i32 16>
diff --git a/llvm/test/Transforms/InstCombine/icmp-shl.ll b/llvm/test/Transforms/InstCombine/icmp-shl.ll
index 5295bd01aa0899..2b8ec33bcf34dc 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shl.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shl.ll
@@ -67,7 +67,7 @@ define <2 x i1> @shl_vec_nsw_slt_1_0_todo_non_splat(<2 x i8> %x, <2 x i8> %C) {
 define <2 x i1> @shl_nsw_sle_n1(<2 x i8> %x, <2 x i8> %C) {
 ; CHECK-LABEL: @shl_nsw_sle_n1(
 ; CHECK-NEXT:    [[Y:%.*]] = shl nsw <2 x i8> [[X:%.*]], [[C:%.*]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp slt <2 x i8> [[Y]], <i8 2, i8 2>
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt <2 x i8> [[Y]], splat (i8 2)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %y = shl nsw <2 x i8> %x, %C
@@ -150,7 +150,7 @@ define i1 @shl_nuw_ugt_Csle0_fail_missing_flag(i8 %x, i8 %C) {
 
 define <2 x i1> @shl_nsw_nuw_sgt_Csle0(<2 x i8> %x, <2 x i8> %C) {
 ; CHECK-LABEL: @shl_nsw_nuw_sgt_Csle0(
-; CHECK-NEXT:    [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -10, i8 -10>
+; CHECK-NEXT:    [[Z:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -10)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %y = shl nsw nuw <2 x i8> %x, %C
@@ -171,7 +171,7 @@ define <2 x i1> @shl_nsw_nuw_sge_Csle0_todo_non_splat(<2 x i8> %x, <2 x i8> %C)
 
 define <2 x i1> @shl_nsw_nuw_sle_Csle0(<2 x i8> %x, <2 x i8> %C) {
 ; CHECK-LABEL: @shl_nsw_nuw_sle_Csle0(
-; CHECK-NEXT:    [[Z:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 -5, i8 -5>
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 -5)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %y = shl nsw nuw <2 x i8> %x, %C
@@ -182,7 +182,7 @@ define <2 x i1> @shl_nsw_nuw_sle_Csle0(<2 x i8> %x, <2 x i8> %C) {
 define <2 x i1> @shl_nsw_nuw_slt_Csle0_fail_positive(<2 x i8> %x, <2 x i8> %C) {
 ; CHECK-LABEL: @shl_nsw_nuw_slt_Csle0_fail_positive(
 ; CHECK-NEXT:    [[Y:%.*]] = shl nuw nsw <2 x i8> [[X:%.*]], [[C:%.*]]
-; CHECK-NEXT:    [[Z:%.*]] = icmp slt <2 x i8> [[Y]], <i8 6, i8 6>
+; CHECK-NEXT:    [[Z:%.*]] = icmp slt <2 x i8> [[Y]], splat (i8 6)
 ; CHECK-NEXT:    ret <2 x i1> [[Z]]
 ;
   %y = shl nsw nuw <2 x i8> %x, %C
diff --git a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
index 5f09964fd93ad5..70f25391847614 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
@@ -2454,7 +2454,7 @@ define i1 @ashr_00_00_ashr_extra_use(i8 %x, ptr %ptr) {
 
 define <4 x i1> @ashr_00_00_vec(<4 x i8> %x) {
 ; CHECK-LABEL: @ashr_00_00_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult <4 x i8> [[X:%.*]], <i8 88, i8 88, i8 88, i8 88>
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <4 x i8> [[X:%.*]], splat (i8 88)
 ; CHECK-NEXT:    ret <4 x i1> [[C]]
 ;
   %s = ashr exact <4 x i8> %x, <i8 3,i8 3, i8 3, i8 3>
diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll
index 71b4f5a970c2f6..1d0cdd1c9ae7f9 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll
@@ -17,7 +17,7 @@ define i1 @lshr_eq_msb_low_last_zero(i8 %a) {
 
 define <2 x i1> @lshr_eq_msb_low_last_zero_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @lshr_eq_msb_low_last_zero_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> [[A:%.*]], <i8 6, i8 6>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> [[A:%.*]], splat (i8 6)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shr = lshr <2 x i8> <i8 127, i8 127>, %a
@@ -1358,7 +1358,7 @@ define i1 @exactly_one_set_signbit_use1(i8 %x, i8 %y) {
 define <2 x i1> @same_signbit(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @same_signbit(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[R1]]
 ;
   %xsign = lshr <2 x i8> %x, <i8 7, i8 7>
@@ -1519,7 +1519,7 @@ define i1 @exactly_one_set_signbit_use1_signed(i8 %x, i8 %y) {
 define <2 x i1> @same_signbit_signed(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @same_signbit_signed(
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[R1:%.*]] = icmp sgt <2 x i8> [[TMP1]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[R1]]
 ;
   %xsign = ashr <2 x i8> %x, <i8 7, i8 7>
diff --git a/llvm/test/Transforms/InstCombine/icmp-signmask.ll b/llvm/test/Transforms/InstCombine/icmp-signmask.ll
index bea8da2074ab0b..92df87dae3eddd 100644
--- a/llvm/test/Transforms/InstCombine/icmp-signmask.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-signmask.ll
@@ -26,7 +26,7 @@ define i1 @cmp_x_and_negp2_with_eq_fail_not_signmask(i8 %x) {
 define <2 x i1> @cmp_x_and_negp2_with_ne(<2 x i8> %x) {
 ; CHECK-LABEL: @cmp_x_and_negp2_with_ne(
 ; CHECK-NEXT:    [[ANDX:%.*]] = and <2 x i8> [[X:%.*]], <i8 -8, i8 -16>
-; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[ANDX]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[ANDX]], splat (i8 -128)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %andx = and <2 x i8> %x, <i8 -8, i8 -16>
@@ -37,7 +37,7 @@ define <2 x i1> @cmp_x_and_negp2_with_ne(<2 x i8> %x) {
 define <2 x i1> @cmp_x_and_negp2_with_ne_or_z(<2 x i8> %x) {
 ; CHECK-LABEL: @cmp_x_and_negp2_with_ne_or_z(
 ; CHECK-NEXT:    [[ANDX:%.*]] = and <2 x i8> [[X:%.*]], <i8 0, i8 -16>
-; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[ANDX]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[ANDX]], splat (i8 -128)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %andx = and <2 x i8> %x, <i8 0, i8 -16>
@@ -48,7 +48,7 @@ define <2 x i1> @cmp_x_and_negp2_with_ne_or_z(<2 x i8> %x) {
 define <2 x i1> @cmp_x_and_negp2_with_ne_fail_not_p2(<2 x i8> %x) {
 ; CHECK-LABEL: @cmp_x_and_negp2_with_ne_fail_not_p2(
 ; CHECK-NEXT:    [[ANDX:%.*]] = and <2 x i8> [[X:%.*]], <i8 -8, i8 -15>
-; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[ANDX]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[ANDX]], splat (i8 -128)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %andx = and <2 x i8> %x, <i8 -8, i8 -15>
diff --git a/llvm/test/Transforms/InstCombine/icmp-sub.ll b/llvm/test/Transforms/InstCombine/icmp-sub.ll
index 8cb3c1c181cec7..8ca9c8ab144817 100644
--- a/llvm/test/Transforms/InstCombine/icmp-sub.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-sub.ll
@@ -164,7 +164,7 @@ define <2 x i1> @icmp_eq_sub_non_splat(<2 x i32> %a) {
 
 define <2 x i1> @icmp_eq_sub_undef2(<2 x i32> %a) {
 ; CHECK-LABEL: @icmp_eq_sub_undef2(
-; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i32> <i32 15, i32 15>, [[A:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i32> splat (i32 15), [[A:%.*]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[SUB]], <i32 10, i32 undef>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -175,7 +175,7 @@ define <2 x i1> @icmp_eq_sub_undef2(<2 x i32> %a) {
 
 define <2 x i1> @icmp_eq_sub_non_splat2(<2 x i32> %a) {
 ; CHECK-LABEL: @icmp_eq_sub_non_splat2(
-; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i32> <i32 15, i32 15>, [[A:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i32> splat (i32 15), [[A:%.*]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[SUB]], <i32 10, i32 11>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -606,7 +606,7 @@ entry:
 define <2 x i1> @PR60818_ne_vector(<2 x i32> %a) {
 ; CHECK-LABEL: @PR60818_ne_vector(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP0]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/icmp-trunc.ll b/llvm/test/Transforms/InstCombine/icmp-trunc.ll
index 4de64d85bbf25a..b85deabf5fa060 100644
--- a/llvm/test/Transforms/InstCombine/icmp-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-trunc.ll
@@ -17,7 +17,7 @@ define i1 @ult_2(i32 %x) {
 
 define <2 x i1> @ult_16_splat(<2 x i16> %x) {
 ; CHECK-LABEL: @ult_16_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 2032, i16 2032>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 2032)
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
@@ -83,8 +83,8 @@ define i1 @ult_192(i32 %x) {
 
 define <2 x i1> @ult_2044_splat(<2 x i16> %x) {
 ; CHECK-LABEL: @ult_2044_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 2044, i16 2044>
-; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], <i16 2044, i16 2044>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 2044)
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], splat (i16 2044)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %t = trunc <2 x i16> %x to <2 x i11>
@@ -133,7 +133,7 @@ define i1 @ugt_3(i32 %x) {
 
 define <2 x i1> @ugt_7_splat(<2 x i16> %x) {
 ; CHECK-LABEL: @ugt_7_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 2040, i16 2040>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 2040)
 ; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
@@ -183,8 +183,8 @@ define i1 @ugt_253(i32 %x) {
 
 define <2 x i1> @ugt_2043_splat(<2 x i16> %x) {
 ; CHECK-LABEL: @ugt_2043_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 2044, i16 2044>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], <i16 2044, i16 2044>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 2044)
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], splat (i16 2044)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %t = trunc <2 x i16> %x to <2 x i11>
@@ -233,7 +233,7 @@ define i1 @slt_0(i32 %x) {
 
 define <2 x i1> @slt_0_splat(<2 x i16> %x) {
 ; CHECK-LABEL: @slt_0_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 1024, i16 1024>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 1024)
 ; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
@@ -279,7 +279,7 @@ define i1 @sgt_n1(i32 %x) {
 
 define <2 x i1> @sgt_n1_splat(<2 x i16> %x) {
 ; CHECK-LABEL: @sgt_n1_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 1024, i16 1024>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 1024)
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
@@ -331,7 +331,7 @@ define i1 @trunc_eq_i32_i8(i32 %x) {
 define <2 x i1> @trunc_eq_v2i32_v2i8(<2 x i32> %x) {
 ; CHECK-LABEL: @trunc_eq_v2i32_v2i8(
 ; CHECK-NEXT:    [[T:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[T]], <i8 42, i8 42>
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[T]], splat (i8 42)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %t = trunc <2 x i32> %x to <2 x i8>
@@ -368,7 +368,7 @@ define i1 @shl1_trunc_eq0(i32 %a) {
 
 define <2 x i1> @shl1_trunc_ne0(<2 x i8> %a) {
 ; CHECK-LABEL: @shl1_trunc_ne0(
-; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[A:%.*]], <i8 5, i8 5>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[A:%.*]], splat (i8 5)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %shl = shl <2 x i8> <i8 1, i8 poison>, %a
@@ -765,7 +765,7 @@ define i1 @sge_nsw_i48(i48 %x) {
 define <2 x i1> @uge_nsw(<2 x i32> %x) {
 ; CHECK-LABEL: @uge_nsw(
 ; CHECK-NEXT:    [[T:%.*]] = trunc nsw <2 x i32> [[X:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[T]], <i8 -46, i8 -46>
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[T]], splat (i8 -46)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %t = trunc nsw <2 x i32> %x to <2 x i8>
diff --git a/llvm/test/Transforms/InstCombine/icmp-uadd-sat.ll b/llvm/test/Transforms/InstCombine/icmp-uadd-sat.ll
index a61feac024a9c3..fc582708e0b750 100644
--- a/llvm/test/Transforms/InstCombine/icmp-uadd-sat.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-uadd-sat.ll
@@ -8,8 +8,8 @@
 ; Basic tests with one user
 ; ==============================================================================
 define i1 @icmp_eq_basic(i8 %arg) {
-; CHECK-LABEL: define i1 @icmp_eq_basic
-; CHECK-SAME: (i8 [[ARG:%.*]]) {
+; CHECK-LABEL: define i1 @icmp_eq_basic(
+; CHECK-SAME: i8 [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[ARG]], 3
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -19,8 +19,8 @@ define i1 @icmp_eq_basic(i8 %arg) {
 }
 
 define i1 @icmp_ne_basic(i16 %arg) {
-; CHECK-LABEL: define i1 @icmp_ne_basic
-; CHECK-SAME: (i16 [[ARG:%.*]]) {
+; CHECK-LABEL: define i1 @icmp_ne_basic(
+; CHECK-SAME: i16 [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i16 [[ARG]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -30,8 +30,8 @@ define i1 @icmp_ne_basic(i16 %arg) {
 }
 
 define i1 @icmp_ule_basic(i32 %arg) {
-; CHECK-LABEL: define i1 @icmp_ule_basic
-; CHECK-SAME: (i32 [[ARG:%.*]]) {
+; CHECK-LABEL: define i1 @icmp_ule_basic(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[ARG]], 2
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -41,8 +41,8 @@ define i1 @icmp_ule_basic(i32 %arg) {
 }
 
 define i1 @icmp_ult_basic(i64 %arg) {
-; CHECK-LABEL: define i1 @icmp_ult_basic
-; CHECK-SAME: (i64 [[ARG:%.*]]) {
+; CHECK-LABEL: define i1 @icmp_ult_basic(
+; CHECK-SAME: i64 [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[ARG]], 15
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -52,8 +52,8 @@ define i1 @icmp_ult_basic(i64 %arg) {
 }
 
 define i1 @icmp_uge_basic(i8 %arg) {
-; CHECK-LABEL: define i1 @icmp_uge_basic
-; CHECK-SAME: (i8 [[ARG:%.*]]) {
+; CHECK-LABEL: define i1 @icmp_uge_basic(
+; CHECK-SAME: i8 [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[ARG]], 3
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -63,8 +63,8 @@ define i1 @icmp_uge_basic(i8 %arg) {
 }
 
 define i1 @icmp_ugt_basic(i16 %arg) {
-; CHECK-LABEL: define i1 @icmp_ugt_basic
-; CHECK-SAME: (i16 [[ARG:%.*]]) {
+; CHECK-LABEL: define i1 @icmp_ugt_basic(
+; CHECK-SAME: i16 [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[ARG]], 2
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -74,8 +74,8 @@ define i1 @icmp_ugt_basic(i16 %arg) {
 }
 
 define i1 @icmp_sle_basic(i32 %arg) {
-; CHECK-LABEL: define i1 @icmp_sle_basic
-; CHECK-SAME: (i32 [[ARG:%.*]]) {
+; CHECK-LABEL: define i1 @icmp_sle_basic(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[ARG]], 2147483637
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -85,8 +85,8 @@ define i1 @icmp_sle_basic(i32 %arg) {
 }
 
 define i1 @icmp_slt_basic(i64 %arg) {
-; CHECK-LABEL: define i1 @icmp_slt_basic
-; CHECK-SAME: (i64 [[ARG:%.*]]) {
+; CHECK-LABEL: define i1 @icmp_slt_basic(
+; CHECK-SAME: i64 [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[ARG]], 9223372036854775783
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
@@ -96,8 +96,8 @@ define i1 @icmp_slt_basic(i64 %arg) {
 }
 
 define i1 @icmp_sge_basic(i8 %arg) {
-; CHECK-LABEL: define i1 @icmp_sge_basic
-; CHECK-SAME: (i8 [[ARG:%.*]]) {
+; CHECK-LABEL: define i1 @icmp_sge_basic(
+; CHECK-SAME: i8 [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[ARG]], -3
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], 124
 ; CHECK-NEXT:    ret i1 [[CMP]]
@@ -108,8 +108,8 @@ define i1 @icmp_sge_basic(i8 %arg) {
 }
 
 define i1 @icmp_sgt_basic(i16 %arg) {
-; CHECK-LABEL: define i1 @icmp_sgt_basic
-; CHECK-SAME: (i16 [[ARG:%.*]]) {
+; CHECK-LABEL: define i1 @icmp_sgt_basic(
+; CHECK-SAME: i16 [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i16 [[ARG]], -4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i16 [[TMP1]], 32762
 ; CHECK-NEXT:    ret i1 [[CMP]]
@@ -123,8 +123,8 @@ define i1 @icmp_sgt_basic(i16 %arg) {
 ; Tests with more than user
 ; ==============================================================================
 define i1 @icmp_eq_multiuse(i8 %arg) {
-; CHECK-LABEL: define i1 @icmp_eq_multiuse
-; CHECK-SAME: (i8 [[ARG:%.*]]) {
+; CHECK-LABEL: define i1 @icmp_eq_multiuse(
+; CHECK-SAME: i8 [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[ADD:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[ARG]], i8 2)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[ADD]], 5
 ; CHECK-NEXT:    call void @use.i8(i8 [[ADD]])
@@ -140,9 +140,9 @@ define i1 @icmp_eq_multiuse(i8 %arg) {
 ; Tests with vector types
 ; ==============================================================================
 define <2 x i1> @icmp_eq_vector_equal(<2 x i8> %arg) {
-; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_equal
-; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], <i8 3, i8 3>
+; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_equal(
+; CHECK-SAME: <2 x i8> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %add = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %arg, <2 x i8> <i8 2, i8 2>)
@@ -151,8 +151,8 @@ define <2 x i1> @icmp_eq_vector_equal(<2 x i8> %arg) {
 }
 
 define <2 x i1> @icmp_eq_vector_unequal(<2 x i8> %arg) {
-; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_unequal
-; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) {
+; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_unequal(
+; CHECK-SAME: <2 x i8> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> <i8 1, i8 2>)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[ADD]], <i8 5, i8 6>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
@@ -163,9 +163,9 @@ define <2 x i1> @icmp_eq_vector_unequal(<2 x i8> %arg) {
 }
 
 define <2 x i1> @icmp_ne_vector_equal(<2 x i16> %arg) {
-; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_equal
-; CHECK-SAME: (<2 x i16> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i16> [[ARG]], <i16 2, i16 2>
+; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_equal(
+; CHECK-SAME: <2 x i16> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i16> [[ARG]], splat (i16 2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %add = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %arg, <2 x i16> <i16 3, i16 3>)
@@ -174,8 +174,8 @@ define <2 x i1> @icmp_ne_vector_equal(<2 x i16> %arg) {
 }
 
 define <2 x i1> @icmp_ne_vector_unequal(<2 x i16> %arg) {
-; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_unequal
-; CHECK-SAME: (<2 x i16> [[ARG:%.*]]) {
+; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_unequal(
+; CHECK-SAME: <2 x i16> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[ADD:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG]], <2 x i16> <i16 3, i16 33>)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i16> [[ADD]], <i16 7, i16 6>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
@@ -186,9 +186,9 @@ define <2 x i1> @icmp_ne_vector_unequal(<2 x i16> %arg) {
 }
 
 define <2 x i1> @icmp_ule_vector_equal(<2 x i32> %arg) {
-; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_equal
-; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[ARG]], <i32 2, i32 2>
+; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_equal(
+; CHECK-SAME: <2 x i32> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[ARG]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %add = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %arg, <2 x i32> <i32 3, i32 3>)
@@ -197,8 +197,8 @@ define <2 x i1> @icmp_ule_vector_equal(<2 x i32> %arg) {
 }
 
 define <2 x i1> @icmp_ule_vector_unequal(<2 x i32> %arg) {
-; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_unequal
-; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) {
+; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_unequal(
+; CHECK-SAME: <2 x i32> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[ADD:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[ARG]], <2 x i32> <i32 3, i32 35>)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[ADD]], <i32 5, i32 7>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
@@ -209,9 +209,9 @@ define <2 x i1> @icmp_ule_vector_unequal(<2 x i32> %arg) {
 }
 
 define <2 x i1> @icmp_sgt_vector_equal(<2 x i64> %arg) {
-; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_equal
-; CHECK-SAME: (<2 x i64> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i64> [[ARG]], <i64 9223372036854366185, i64 9223372036854366185>
+; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_equal(
+; CHECK-SAME: <2 x i64> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i64> [[ARG]], splat (i64 9223372036854366185)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %add = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %arg, <2 x i64> <i64 409623, i64 409623>)
@@ -220,8 +220,8 @@ define <2 x i1> @icmp_sgt_vector_equal(<2 x i64> %arg) {
 }
 
 define <2 x i1> @icmp_sgt_vector_unequal(<2 x i64> %arg) {
-; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_unequal
-; CHECK-SAME: (<2 x i64> [[ARG:%.*]]) {
+; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_unequal(
+; CHECK-SAME: <2 x i64> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[ADD:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> [[ARG]], <2 x i64> <i64 320498, i64 409623>)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i64> [[ADD]], <i64 1234, i64 3456>
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
@@ -235,10 +235,10 @@ define <2 x i1> @icmp_sgt_vector_unequal(<2 x i64> %arg) {
 ; Tests with vector types and multiple uses
 ; ==============================================================================
 define <2 x i1> @icmp_eq_vector_multiuse_equal(<2 x i8> %arg) {
-; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_multiuse_equal
-; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> <i8 2, i8 2>)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[ADD]], <i8 5, i8 5>
+; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_multiuse_equal(
+; CHECK-SAME: <2 x i8> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[ADD:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> splat (i8 2))
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[ADD]], splat (i8 5)
 ; CHECK-NEXT:    call void @use.v2i8(<2 x i8> [[ADD]])
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll
index 32ef6267cdf8b4..347d35548164b2 100644
--- a/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll
@@ -39,7 +39,7 @@ define i1 @p0(i8 %val, i8 %bits) {
 
 define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) {
 ; CHECK-LABEL: @p1_vec(
-; CHECK-NEXT:    [[T0:%.*]] = shl nuw <2 x i8> <i8 1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[BITS:%.*]]
 ; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
 ; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[VAL_HIGHBITS]], zeroinitializer
@@ -56,7 +56,7 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) {
 ; CHECK-LABEL: @p2_vec_undef0(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[BITS:%.*]]
 ; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add <3 x i8> [[T0]], <i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[T1:%.*]] = add <3 x i8> [[T0]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = icmp uge <3 x i8> [[T1]], [[VAL:%.*]]
 ; CHECK-NEXT:    ret <3 x i1> [[R]]
 ;
@@ -84,7 +84,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %val, <3 x i8> %bits) {
 
 define <3 x i1> @p2_vec_poison1(<3 x i8> %val, <3 x i8> %bits) {
 ; CHECK-LABEL: @p2_vec_poison1(
-; CHECK-NEXT:    [[T0:%.*]] = shl nuw <3 x i8> <i8 1, i8 1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw <3 x i8> splat (i8 1), [[BITS:%.*]]
 ; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
 ; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
@@ -216,7 +216,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
 ; CHECK-LABEL: @n2_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 -1>, [[BITS:%.*]]
 ; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add <2 x i8> [[T0]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[T1:%.*]] = add <2 x i8> [[T0]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
@@ -229,7 +229,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
 
 define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
 ; CHECK-LABEL: @n3_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = shl nuw <2 x i8> <i8 1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[BITS:%.*]]
 ; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
 ; CHECK-NEXT:    [[T1:%.*]] = add <2 x i8> [[T0]], <i8 -1, i8 1>
 ; CHECK-NEXT:    [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll
index ba47ed02edbdf4..d3cf4f530d9946 100644
--- a/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll
@@ -198,7 +198,7 @@ define i1 @n1(i8 %val, i8 %bits) {
 define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
 ; CHECK-LABEL: @n2_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 -1, i8 1>, [[BITS:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = xor <2 x i8> [[T0]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[T1:%.*]] = xor <2 x i8> [[T0]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = icmp ule <2 x i8> [[VAL:%.*]], [[T1]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
@@ -210,7 +210,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
 
 define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
 ; CHECK-LABEL: @n3_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = shl nsw <2 x i8> <i8 -1, i8 -1>, [[BITS:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[BITS:%.*]]
 ; CHECK-NEXT:    [[T1:%.*]] = xor <2 x i8> [[T0]], <i8 -1, i8 1>
 ; CHECK-NEXT:    [[R:%.*]] = icmp uge <2 x i8> [[T1]], [[VAL:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll
index 25894a22f00751..34fc0002eee168 100644
--- a/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll
@@ -39,7 +39,7 @@ define i1 @p0(i8 %val, i8 %bits) {
 
 define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) {
 ; CHECK-LABEL: @p1_vec(
-; CHECK-NEXT:    [[T0:%.*]] = shl nuw <2 x i8> <i8 1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[BITS:%.*]]
 ; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
 ; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <2 x i8> [[VAL:%.*]], [[BITS]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[VAL_HIGHBITS]], zeroinitializer
@@ -56,7 +56,7 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) {
 ; CHECK-LABEL: @p2_vec_undef0(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[BITS:%.*]]
 ; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add <3 x i8> [[T0]], <i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[T1:%.*]] = add <3 x i8> [[T0]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = icmp ult <3 x i8> [[T1]], [[VAL:%.*]]
 ; CHECK-NEXT:    ret <3 x i1> [[R]]
 ;
@@ -84,7 +84,7 @@ define <3 x i1> @p2_vec_poison0(<3 x i8> %val, <3 x i8> %bits) {
 
 define <3 x i1> @p2_vec_poison1(<3 x i8> %val, <3 x i8> %bits) {
 ; CHECK-LABEL: @p2_vec_poison1(
-; CHECK-NEXT:    [[T0:%.*]] = shl nuw <3 x i8> <i8 1, i8 1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw <3 x i8> splat (i8 1), [[BITS:%.*]]
 ; CHECK-NEXT:    call void @use3i8(<3 x i8> [[T0]])
 ; CHECK-NEXT:    [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer
@@ -216,7 +216,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
 ; CHECK-LABEL: @n2_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 1, i8 -1>, [[BITS:%.*]]
 ; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
-; CHECK-NEXT:    [[T1:%.*]] = add <2 x i8> [[T0]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[T1:%.*]] = add <2 x i8> [[T0]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
@@ -229,7 +229,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
 
 define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
 ; CHECK-LABEL: @n3_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = shl nuw <2 x i8> <i8 1, i8 1>, [[BITS:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw <2 x i8> splat (i8 1), [[BITS:%.*]]
 ; CHECK-NEXT:    call void @use2i8(<2 x i8> [[T0]])
 ; CHECK-NEXT:    [[T1:%.*]] = add <2 x i8> [[T0]], <i8 -1, i8 1>
 ; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll
index 37aa85202e5622..f8f508057bacd8 100644
--- a/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll
@@ -198,7 +198,7 @@ define i1 @n1(i8 %val, i8 %bits) {
 define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
 ; CHECK-LABEL: @n2_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i8> <i8 -1, i8 1>, [[BITS:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = xor <2 x i8> [[T0]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[T1:%.*]] = xor <2 x i8> [[T0]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[VAL:%.*]], [[T1]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
@@ -210,7 +210,7 @@ define <2 x i1> @n2_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
 
 define <2 x i1> @n3_vec_nonsplat(<2 x i8> %val, <2 x i8> %bits) {
 ; CHECK-LABEL: @n3_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = shl nsw <2 x i8> <i8 -1, i8 -1>, [[BITS:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nsw <2 x i8> splat (i8 -1), [[BITS:%.*]]
 ; CHECK-NEXT:    [[T1:%.*]] = xor <2 x i8> [[T0]], <i8 -1, i8 1>
 ; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[T1]], [[VAL:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/icmp-usub-sat.ll b/llvm/test/Transforms/InstCombine/icmp-usub-sat.ll
index 87257e40ac6a2c..2cd07b17af5806 100644
--- a/llvm/test/Transforms/InstCombine/icmp-usub-sat.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-usub-sat.ll
@@ -275,7 +275,7 @@ define i1 @icmp_eq_multiuse_negative(i8 %arg) {
 define <2 x i1> @icmp_eq_vector_positive_equal(<2 x i8> %arg) {
 ; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_positive_equal
 ; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], <i8 7, i8 7>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], splat (i8 7)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %sub = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %arg, <2 x i8> <i8 2, i8 2>)
@@ -298,7 +298,7 @@ define <2 x i1> @icmp_eq_vector_positive_unequal(<2 x i8> %arg) {
 define <2 x i1> @icmp_ne_vector_positive_equal(<2 x i16> %arg) {
 ; CHECK-LABEL: define <2 x i1> @icmp_ne_vector_positive_equal
 ; CHECK-SAME: (<2 x i16> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i16> [[ARG]], <i16 37, i16 37>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i16> [[ARG]], splat (i16 37)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %sub = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %arg, <2 x i16> <i16 32, i16 32>)
@@ -321,7 +321,7 @@ define <2 x i1> @icmp_ne_vector_positive_unequal(<2 x i16> %arg) {
 define <2 x i1> @icmp_ule_vector_positive_equal(<2 x i32> %arg) {
 ; CHECK-LABEL: define <2 x i1> @icmp_ule_vector_positive_equal
 ; CHECK-SAME: (<2 x i32> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[ARG]], <i32 37, i32 37>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[ARG]], splat (i32 37)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %sub = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %arg, <2 x i32> <i32 32, i32 32>)
@@ -344,8 +344,8 @@ define <2 x i1> @icmp_ule_vector_positive_unequal(<2 x i32> %arg) {
 define <2 x i1> @icmp_sgt_vector_positive_equal(<2 x i64> %arg) {
 ; CHECK-LABEL: define <2 x i1> @icmp_sgt_vector_positive_equal
 ; CHECK-SAME: (<2 x i64> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[ARG]], <i64 -410858, i64 -410858>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i64> [[TMP1]], <i64 9223372036854774573, i64 9223372036854774573>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[ARG]], splat (i64 -410858)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i64> [[TMP1]], splat (i64 9223372036854774573)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %sub = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %arg, <2 x i64> <i64 409623, i64 409623>)
@@ -371,7 +371,7 @@ define <2 x i1> @icmp_sgt_vector_positive_unequal(<2 x i64> %arg) {
 define <2 x i1> @icmp_eq_vector_negative_equal(<2 x i8> %arg) {
 ; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_negative_equal
 ; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], <i8 -3, i8 -3>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[ARG]], splat (i8 -3)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %sub = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %arg, <2 x i8> <i8 -5, i8 -5>)
@@ -397,8 +397,8 @@ define <2 x i1> @icmp_eq_vector_negative_unequal(<2 x i8> %arg) {
 define <2 x i1> @icmp_eq_vector_multiuse_positive_equal(<2 x i8> %arg) {
 ; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_multiuse_positive_equal
 ; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SUB:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> <i8 2, i8 2>)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[SUB]], <i8 5, i8 5>
+; CHECK-NEXT:    [[SUB:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> splat (i8 2))
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[SUB]], splat (i8 5)
 ; CHECK-NEXT:    call void @use.v2i8(<2 x i8> [[SUB]])
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -414,8 +414,8 @@ define <2 x i1> @icmp_eq_vector_multiuse_positive_equal(<2 x i8> %arg) {
 define <2 x i1> @icmp_eq_vector_multiuse_negative_equal(<2 x i8> %arg) {
 ; CHECK-LABEL: define <2 x i1> @icmp_eq_vector_multiuse_negative_equal
 ; CHECK-SAME: (<2 x i8> [[ARG:%.*]]) {
-; CHECK-NEXT:    [[SUB:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> <i8 -20, i8 -20>)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[SUB]], <i8 5, i8 5>
+; CHECK-NEXT:    [[SUB:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[ARG]], <2 x i8> splat (i8 -20))
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[SUB]], splat (i8 5)
 ; CHECK-NEXT:    call void @use.v2i8(<2 x i8> [[SUB]])
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll b/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll
index 677655a0a99ab6..bb1046759aa306 100644
--- a/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-vec-inseltpoison.ll
@@ -44,7 +44,7 @@ define <2 x i1> @ule(<2 x i8> %x) {
 
 define <2 x i1> @ult_min_signed_value(<2 x i8> %x) {
 ; CHECK-LABEL: @ult_min_signed_value(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %cmp = icmp ult <2 x i8> %x, <i8 128, i8 128>
@@ -55,7 +55,7 @@ define <2 x i1> @ult_min_signed_value(<2 x i8> %x) {
 
 define <2 x i1> @sge_zero(<2 x i8> %x) {
 ; CHECK-LABEL: @sge_zero(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %cmp = icmp sge <2 x i8> %x, <i8 0, i8 0>
@@ -72,7 +72,7 @@ define <2 x i1> @uge_zero(<2 x i8> %x) {
 
 define <2 x i1> @sle_zero(<2 x i8> %x) {
 ; CHECK-LABEL: @sle_zero(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %cmp = icmp sle <2 x i8> %x, <i8 0, i8 0>
@@ -291,7 +291,7 @@ define <2 x i1> @same_shuffle_inputs_icmp_extra_use3(<4 x i8> %x, <4 x i8> %y) {
 
 define <4 x i1> @splat_icmp(<4 x i8> %x) {
 ; CHECK-LABEL: @splat_icmp(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 42, i8 42>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], splat (i8 42)
 ; CHECK-NEXT:    [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -302,7 +302,7 @@ define <4 x i1> @splat_icmp(<4 x i8> %x) {
 
 define <4 x i1> @splat_icmp_poison(<4 x i8> %x) {
 ; CHECK-LABEL: @splat_icmp_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 42, i8 42>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], splat (i8 42)
 ; CHECK-NEXT:    [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -313,7 +313,7 @@ define <4 x i1> @splat_icmp_poison(<4 x i8> %x) {
 
 define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) {
 ; CHECK-LABEL: @splat_icmp_larger_size(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 42, i8 42>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 42)
 ; CHECK-NEXT:    [[CMP:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -324,7 +324,7 @@ define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) {
 
 define <4 x i1> @splat_fcmp_smaller_size(<5 x float> %x) {
 ; CHECK-LABEL: @splat_fcmp_smaller_size(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], <float 4.200000e+01, float 4.200000e+01, float 4.200000e+01, float 4.200000e+01, float 4.200000e+01>
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], splat (float 4.200000e+01)
 ; CHECK-NEXT:    [[CMP:%.*]] = shufflevector <5 x i1> [[TMP1]], <5 x i1> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -339,7 +339,7 @@ define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) {
 ; CHECK-LABEL: @splat_icmp_extra_use(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 ; CHECK-NEXT:    call void @use_v4i8(<4 x i8> [[SPLATX]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], <i8 42, i8 42, i8 42, i8 42>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], splat (i8 42)
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
   %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -353,7 +353,7 @@ define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) {
 define <4 x i1> @not_splat_icmp(<4 x i8> %x) {
 ; CHECK-LABEL: @not_splat_icmp(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 3, i32 3>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], <i8 42, i8 42, i8 42, i8 42>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], splat (i8 42)
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
   %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 3, i32 3>
diff --git a/llvm/test/Transforms/InstCombine/icmp-vec.ll b/llvm/test/Transforms/InstCombine/icmp-vec.ll
index 1e392635936b7e..708941f99da6fc 100644
--- a/llvm/test/Transforms/InstCombine/icmp-vec.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-vec.ll
@@ -46,7 +46,7 @@ define <2 x i1> @ule(<2 x i8> %x) {
 
 define <2 x i1> @ult_min_signed_value(<2 x i8> %x) {
 ; CHECK-LABEL: @ult_min_signed_value(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %cmp = icmp ult <2 x i8> %x, <i8 128, i8 128>
@@ -57,7 +57,7 @@ define <2 x i1> @ult_min_signed_value(<2 x i8> %x) {
 
 define <2 x i1> @sge_zero(<2 x i8> %x) {
 ; CHECK-LABEL: @sge_zero(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %cmp = icmp sge <2 x i8> %x, <i8 0, i8 0>
@@ -74,7 +74,7 @@ define <2 x i1> @uge_zero(<2 x i8> %x) {
 
 define <2 x i1> @sle_zero(<2 x i8> %x) {
 ; CHECK-LABEL: @sle_zero(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %cmp = icmp sle <2 x i8> %x, <i8 0, i8 0>
@@ -293,7 +293,7 @@ define <2 x i1> @same_shuffle_inputs_icmp_extra_use3(<4 x i8> %x, <4 x i8> %y) {
 
 define <4 x i1> @splat_icmp(<4 x i8> %x) {
 ; CHECK-LABEL: @splat_icmp(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 42, i8 42>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i8> [[X:%.*]], splat (i8 42)
 ; CHECK-NEXT:    [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -304,7 +304,7 @@ define <4 x i1> @splat_icmp(<4 x i8> %x) {
 
 define <4 x i1> @splat_icmp_poison(<4 x i8> %x) {
 ; CHECK-LABEL: @splat_icmp_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 42, i8 42>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i8> [[X:%.*]], splat (i8 42)
 ; CHECK-NEXT:    [[CMP:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -315,7 +315,7 @@ define <4 x i1> @splat_icmp_poison(<4 x i8> %x) {
 
 define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) {
 ; CHECK-LABEL: @splat_icmp_larger_size(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 42, i8 42>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 42)
 ; CHECK-NEXT:    [[CMP:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -326,7 +326,7 @@ define <4 x i1> @splat_icmp_larger_size(<2 x i8> %x) {
 
 define <4 x i1> @splat_fcmp_smaller_size(<5 x float> %x) {
 ; CHECK-LABEL: @splat_fcmp_smaller_size(
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], <float 4.200000e+01, float 4.200000e+01, float 4.200000e+01, float 4.200000e+01, float 4.200000e+01>
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp oeq <5 x float> [[X:%.*]], splat (float 4.200000e+01)
 ; CHECK-NEXT:    [[CMP:%.*]] = shufflevector <5 x i1> [[TMP1]], <5 x i1> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -341,7 +341,7 @@ define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) {
 ; CHECK-LABEL: @splat_icmp_extra_use(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
 ; CHECK-NEXT:    call void @use_v4i8(<4 x i8> [[SPLATX]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], <i8 42, i8 42, i8 42, i8 42>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], splat (i8 42)
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
   %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -355,7 +355,7 @@ define <4 x i1> @splat_icmp_extra_use(<4 x i8> %x) {
 define <4 x i1> @not_splat_icmp(<4 x i8> %x) {
 ; CHECK-LABEL: @not_splat_icmp(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 3, i32 3>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], <i8 42, i8 42, i8 42, i8 42>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i8> [[SPLATX]], splat (i8 42)
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
   %splatx = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 3, i32 3>
diff --git a/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll b/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll
index d08dca225328fe..dd07a7d452a777 100644
--- a/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-xor-signbit.ll
@@ -113,7 +113,7 @@ define i1 @sge_to_ugt(i8 %x) {
 
 define <2 x i1> @sge_to_ugt_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @sge_to_ugt_splat(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 -114, i8 -114>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 -114)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %a = xor <2 x i8> %x, <i8 128, i8 128>
@@ -135,7 +135,7 @@ define i1 @uge_to_sgt(i8 %x) {
 
 define <2 x i1> @uge_to_sgt_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @uge_to_sgt_splat(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -114, i8 -114>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -114)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %a = xor <2 x i8> %x, <i8 128, i8 128>
@@ -157,7 +157,7 @@ define i1 @sge_to_ult(i8 %x) {
 
 define <2 x i1> @sge_to_ult_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @sge_to_ult_splat(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 113, i8 113>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i8> [[X:%.*]], splat (i8 113)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %a = xor <2 x i8> %x, <i8 127, i8 127>
@@ -179,7 +179,7 @@ define i1 @uge_to_slt(i8 %x) {
 
 define <2 x i1> @uge_to_slt_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @uge_to_slt_splat(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 113, i8 113>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 113)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %a = xor <2 x i8> %x, <i8 127, i8 127>
@@ -268,7 +268,7 @@ define i1 @slt_zero_ne_ne_0(i32 %a) {
 
 define <4 x i1> @slt_zero_eq_ne_0_vec(<4 x i32> %a) {
 ; CHECK-LABEL: @slt_zero_eq_ne_0_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[A:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[A:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <4 x i1> [[TMP1]]
 ;
   %cmp = icmp ne <4 x i32> %a, zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index ecf21b8a42cf50..d499aecd7327fb 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -22,7 +22,7 @@ define i32 @test1(i32 %X) {
 
 define <2 x i32> @test1vec(<2 x i32> %X) {
 ; CHECK-LABEL: @test1vec(
-; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[X_LOBIT:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i32> [[X_LOBIT]]
 ;
   %a = icmp slt <2 x i32> %X, zeroinitializer
@@ -43,7 +43,7 @@ define i32 @test2(i32 %X) {
 
 define <2 x i32> @test2vec(<2 x i32> %X) {
 ; CHECK-LABEL: @test2vec(
-; CHECK-NEXT:    [[A:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[A:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[B:%.*]] = zext <2 x i1> [[A]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
@@ -167,7 +167,7 @@ define i1 @test9(i32 %x) {
 
 define <2 x i1> @test9_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test9_vec(
-; CHECK-NEXT:    [[B:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[B:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
   %a = add <2 x i32> %x, <i32 -2, i32 -2>
@@ -187,7 +187,7 @@ define i1 @test9b(i32 %x) {
 
 define <2 x i1> @test9b_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test9b_vec(
-; CHECK-NEXT:    [[B:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[B:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
   %a = add <2 x i32> %x, <i32 -2, i32 -2>
@@ -207,7 +207,7 @@ define i1 @test10(i32 %x) {
 
 define <2 x i1> @test10_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test10_vec(
-; CHECK-NEXT:    [[B:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[B:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 -2147483648)
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
   %a = add <2 x i32> %x, <i32 -1, i32 -1>
@@ -227,7 +227,7 @@ define i1 @test10b(i32 %x) {
 
 define <2 x i1> @test10b_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test10b_vec(
-; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[B:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 -2147483648)
 ; CHECK-NEXT:    ret <2 x i1> [[B]]
 ;
   %a = add <2 x i32> %x, <i32 -1, i32 -1>
@@ -310,7 +310,7 @@ define i1 @test17(i32 %x) {
 
 define <2 x i1> @test17vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test17vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %x
@@ -332,7 +332,7 @@ define i1 @test17a(i32 %x) {
 
 define <2 x i1> @test17a_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test17a_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %x
@@ -354,7 +354,7 @@ define i1 @test18_eq(i32 %x) {
 
 define <2 x i1> @test18_eq_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test18_eq_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
 ;
   %sh = lshr <2 x i32> <i32 8, i32 8>, %x
@@ -376,7 +376,7 @@ define i1 @test18_ne(i32 %x) {
 
 define <2 x i1> @test18_ne_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test18_ne_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
 ;
   %sh = lshr <2 x i32> <i32 8, i32 8>, %x
@@ -398,7 +398,7 @@ define i1 @test19(i32 %x) {
 
 define <2 x i1> @test19vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test19vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %x
@@ -430,7 +430,7 @@ define i1 @test20(i32 %x) {
 
 define <2 x i1> @test20vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test20vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[X:%.*]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i1> [[TMP1]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %x
@@ -452,7 +452,7 @@ define i1 @test20a(i32 %x) {
 
 define <2 x i1> @test20a_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test20a_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %x
@@ -494,7 +494,7 @@ define i1 @test23(i32 %x) {
 
 define <2 x i1> @test23vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test23vec(
-; CHECK-NEXT:    [[I4:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 1328634634, i32 1328634634>
+; CHECK-NEXT:    [[I4:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 1328634634)
 ; CHECK-NEXT:    ret <2 x i1> [[I4]]
 ;
   %i3 = sdiv <2 x i32> %x, <i32 -1328634635, i32 -1328634635>
@@ -1138,7 +1138,7 @@ define i1 @test55(i32 %a) {
 
 define <2 x i1> @test55vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test55vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], <i32 -123, i32 -123>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], splat (i32 -123)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %sub = sub <2 x i32> zeroinitializer, %a
@@ -1158,7 +1158,7 @@ define i1 @test56(i32 %a) {
 
 define <2 x i1> @test56vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test56vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], <i32 -113, i32 -113>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], splat (i32 -113)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %sub = sub <2 x i32> <i32 10, i32 10>, %a
@@ -1423,7 +1423,7 @@ define i1 @test67inverse(i32 %x) {
 
 define <2 x i1> @test67vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test67vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 96, i32 96>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 96)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1434,7 +1434,7 @@ define <2 x i1> @test67vec(<2 x i32> %x) {
 
 define <2 x i1> @test67vec2(<2 x i32> %x) {
 ; CHECK-LABEL: @test67vec2(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 96, i32 96>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 96)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1445,7 +1445,7 @@ define <2 x i1> @test67vec2(<2 x i32> %x) {
 
 define <2 x i1> @test67vecinverse(<2 x i32> %x) {
 ; CHECK-LABEL: @test67vecinverse(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 96, i32 96>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 96)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[AND]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1480,7 +1480,7 @@ define i1 @test70(i32 %X) {
 
 define <2 x i1> @test70vec(<2 x i32> %X) {
 ; CHECK-LABEL: @test70vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = add <2 x i32> %X, <i32 2, i32 2>
@@ -1516,7 +1516,7 @@ define i1 @icmp_sext8trunc(i32 %x) {
 define <2 x i1> @icmp_sext8trunc_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_sext8trunc_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], <i8 36, i8 36>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], splat (i8 36)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %trunc = trunc <2 x i32> %x to <2 x i8>
@@ -1552,7 +1552,7 @@ define i1 @icmp_shl17(i32 %x) {
 define <2 x i1> @icmp_shl16_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_shl16_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i16>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i16> [[TMP1]], <i16 36, i16 36>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i16> [[TMP1]], splat (i16 36)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> %x, <i32 16, i32 16>
@@ -1584,7 +1584,7 @@ define i1 @icmp_shl_eq(i32 %x) {
 
 define <2 x i1> @icmp_shl_eq_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_shl_eq_vec(
-; CHECK-NEXT:    [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], <i32 134217727, i32 134217727>
+; CHECK-NEXT:    [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 134217727)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[MUL_MASK]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1626,7 +1626,7 @@ define i1 @icmp_shl_ne(i32 %x) {
 
 define <2 x i1> @icmp_shl_ne_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_shl_ne_vec(
-; CHECK-NEXT:    [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], <i32 33554431, i32 33554431>
+; CHECK-NEXT:    [[MUL_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 33554431)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[MUL_MASK]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1637,7 +1637,7 @@ define <2 x i1> @icmp_shl_ne_vec(<2 x i32> %x) {
 
 define <2 x i1> @icmp_shl_nuw_ne_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_shl_nuw_ne_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl nuw <2 x i32> %x, <i32 7, i32 7>
@@ -1780,7 +1780,7 @@ define i1 @icmp_add20_eq_add57(i32 %x, i32 %y) {
 
 define <2 x i1> @icmp_add20_eq_add57_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @icmp_add20_eq_add57_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], <i32 37, i32 37>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 37)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1792,7 +1792,7 @@ define <2 x i1> @icmp_add20_eq_add57_splat(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i1> @icmp_add20_eq_add57_poison(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @icmp_add20_eq_add57_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], <i32 37, i32 37>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 37)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1828,7 +1828,7 @@ define i1 @icmp_sub57_ne_sub20(i32 %x, i32 %y) {
 
 define <2 x i1> @icmp_sub57_ne_sub20_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @icmp_sub57_ne_sub20_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -37, i32 -37>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -37)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1840,7 +1840,7 @@ define <2 x i1> @icmp_sub57_ne_sub20_splat(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i1> @icmp_sub57_ne_sub20_vec_poison(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @icmp_sub57_ne_sub20_vec_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -37, i32 -37>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 -37)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1852,7 +1852,7 @@ define <2 x i1> @icmp_sub57_ne_sub20_vec_poison(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i1> @icmp_sub57_ne_sub20_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @icmp_sub57_ne_sub20_vec_nonsplat(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], <i32 37, i32 37>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 37)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1916,7 +1916,7 @@ define i1 @icmp_add20_sge_add57(i32 %x, i32 %y) {
 
 define <2 x i1> @icmp_add20_sge_add57_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @icmp_add20_sge_add57_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], <i32 37, i32 37>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], splat (i32 37)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sge <2 x i32> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1928,7 +1928,7 @@ define <2 x i1> @icmp_add20_sge_add57_splat(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i1> @icmp_add20_sge_add57_poison(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @icmp_add20_sge_add57_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], <i32 37, i32 37>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], splat (i32 37)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sge <2 x i32> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1965,7 +1965,7 @@ define i1 @icmp_sub57_sge_sub20(i32 %x, i32 %y) {
 
 define <2 x i1> @icmp_sub57_sge_sub20_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @icmp_sub57_sge_sub20_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], <i32 -37, i32 -37>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], splat (i32 -37)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sge <2 x i32> [[TMP1]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -1977,7 +1977,7 @@ define <2 x i1> @icmp_sub57_sge_sub20_splat(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i1> @icmp_sub57_sge_sub20_vec_poison(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @icmp_sub57_sge_sub20_vec_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], <i32 -37, i32 -37>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], splat (i32 -37)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sge <2 x i32> [[TMP1]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
@@ -2086,8 +2086,8 @@ define i1 @icmp_add_and_shr_ne_0(i32 %X) {
 
 define <2 x i1> @icmp_add_and_shr_ne_0_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_add_and_shr_ne_0_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 240, i32 240>
-; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 224, i32 224>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 240)
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 224)
 ; CHECK-NEXT:    ret <2 x i1> [[TOBOOL]]
 ;
   %shr = lshr <2 x i32> %X, <i32 4, i32 4>
@@ -2292,7 +2292,7 @@ define i1 @icmp_shl_1_V_ult_32(i32 %V) {
 
 define <2 x i1> @icmp_shl_1_V_ult_32_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @icmp_shl_1_V_ult_32_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], splat (i32 5)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %V
@@ -2312,7 +2312,7 @@ define i1 @icmp_shl_1_V_eq_32(i32 %V) {
 
 define <2 x i1> @icmp_shl_1_V_eq_32_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @icmp_shl_1_V_eq_32_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], splat (i32 5)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %V
@@ -2332,7 +2332,7 @@ define i1 @icmp_shl_1_V_ult_30(i32 %V) {
 
 define <2 x i1> @icmp_shl_1_V_ult_30_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @icmp_shl_1_V_ult_30_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], splat (i32 5)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %V
@@ -2352,7 +2352,7 @@ define i1 @icmp_shl_1_V_ugt_30(i32 %V) {
 
 define <2 x i1> @icmp_shl_1_V_ugt_30_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @icmp_shl_1_V_ugt_30_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], splat (i32 4)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %V
@@ -2372,7 +2372,7 @@ define i1 @icmp_shl_1_V_ule_30(i32 %V) {
 
 define <2 x i1> @icmp_shl_1_V_ule_30_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @icmp_shl_1_V_ule_30_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[V:%.*]], splat (i32 5)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %V
@@ -2392,7 +2392,7 @@ define i1 @icmp_shl_1_V_uge_30(i32 %V) {
 
 define <2 x i1> @icmp_shl_1_V_uge_30_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @icmp_shl_1_V_uge_30_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[V:%.*]], splat (i32 4)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %V
@@ -2412,7 +2412,7 @@ define i1 @icmp_shl_1_V_uge_2147483648(i32 %V) {
 
 define <2 x i1> @icmp_shl_1_V_uge_2147483648_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @icmp_shl_1_V_uge_2147483648_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %V
@@ -2432,7 +2432,7 @@ define i1 @icmp_shl_1_V_ult_2147483648(i32 %V) {
 
 define <2 x i1> @icmp_shl_1_V_ult_2147483648_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @icmp_shl_1_V_ult_2147483648_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %V
@@ -2452,7 +2452,7 @@ define i1 @icmp_shl_1_V_sle_0(i32 %V) {
 
 define <2 x i1> @icmp_shl_1_V_sle_0_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @icmp_shl_1_V_sle_0_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %V
@@ -2472,7 +2472,7 @@ define i1 @icmp_shl_1_V_sle_negative(i32 %V) {
 
 define <2 x i1> @icmp_shl_1_V_sle_0_negative(<2 x i32> %V) {
 ; CHECK-LABEL: @icmp_shl_1_V_sle_0_negative(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %V
@@ -2492,7 +2492,7 @@ define i1 @icmp_shl_1_V_sgt_0(i32 %V) {
 
 define <2 x i1> @icmp_shl_1_V_sgt_0_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @icmp_shl_1_V_sgt_0_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %V
@@ -2512,7 +2512,7 @@ define i1 @icmp_shl_1_V_sgt_negative(i32 %V) {
 
 define <2 x i1> @icmp_shl_1_V_sgt_negative_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @icmp_shl_1_V_sgt_negative_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[V:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 1, i32 1>, %V
@@ -2547,7 +2547,7 @@ define i1 @or_icmp_eq_B_0_icmp_ult_A_B_logical(i64 %a, i64 %b) {
 
 define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_uniform(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], splat (i64 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], [[A:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
@@ -2559,7 +2559,7 @@ define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_uniform(<2 x i64> %a, <2 x i64> %b)
 
 define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_poison(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], splat (i64 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], [[A:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
@@ -2793,7 +2793,7 @@ define i1 @and_icmp_ne_B_0_icmp_uge_A_B_logical(i64 %a, i64 %b) {
 
 define <2 x i1> @and_icmp_ne_B_0_icmp_uge_A_B_uniform(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: @and_icmp_ne_B_0_icmp_uge_A_B_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], splat (i64 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], [[A:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
@@ -2805,7 +2805,7 @@ define <2 x i1> @and_icmp_ne_B_0_icmp_uge_A_B_uniform(<2 x i64> %a, <2 x i64> %b
 
 define <2 x i1> @and_icmp_ne_B_0_icmp_uge_A_B_poison(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: @and_icmp_ne_B_0_icmp_uge_A_B_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], splat (i64 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], [[A:%.*]]
 ; CHECK-NEXT:    ret <2 x i1> [[TMP2]]
 ;
@@ -2828,8 +2828,8 @@ define i1 @icmp_add_ult_2(i32 %X) {
 
 define <2 x i1> @icmp_add_X_-14_ult_2_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_add_X_-14_ult_2_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -2, i32 -2>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 14, i32 14>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -2)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 14)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %add = add <2 x i32> %X, <i32 -14, i32 -14>
@@ -2850,8 +2850,8 @@ define i1 @icmp_sub_3_X_ult_2(i32 %X) {
 
 define <2 x i1> @icmp_sub_3_X_ult_2_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_sub_3_X_ult_2_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -2, i32 -2>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -2)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %add = sub <2 x i32> <i32 3, i32 3>, %X
@@ -2872,8 +2872,8 @@ define i1 @icmp_add_X_-14_uge_2(i32 %X) {
 
 define <2 x i1> @icmp_add_X_-14_uge_2_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_add_X_-14_uge_2_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -2, i32 -2>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 14, i32 14>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -2)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 14)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %add = add <2 x i32> %X, <i32 -14, i32 -14>
@@ -2894,8 +2894,8 @@ define i1 @icmp_sub_3_X_uge_2(i32 %X) {
 
 define <2 x i1> @icmp_sub_3_X_uge_2_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_sub_3_X_uge_2_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -2, i32 -2>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -2)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %add = sub <2 x i32> <i32 3, i32 3>, %X
@@ -2915,7 +2915,7 @@ define i1 @icmp_and_X_-16_eq-16(i32 %X) {
 
 define <2 x i1> @icmp_and_X_-16_eq-16_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_and_X_-16_eq-16_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 -17, i32 -17>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 -17)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %and = and <2 x i32> %X, <i32 -16, i32 -16>
@@ -2935,7 +2935,7 @@ define i1 @icmp_and_X_-16_ne-16(i32 %X) {
 
 define <2 x i1> @icmp_and_X_-16_ne-16_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_and_X_-16_ne-16_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 -16, i32 -16>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 -16)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %and = and <2 x i32> %X, <i32 -16, i32 -16>
@@ -2960,7 +2960,7 @@ define i1 @or1_eq1(i32 %x) {
 
 define <2 x i1> @or3_eq3_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @or3_eq3_vec(
-; CHECK-NEXT:    [[T1:%.*]] = icmp ult <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[T1:%.*]] = icmp ult <2 x i8> [[X:%.*]], splat (i8 4)
 ; CHECK-NEXT:    ret <2 x i1> [[T1]]
 ;
   %t0 = or <2 x i8> %x, <i8 3, i8 3>
@@ -2984,7 +2984,7 @@ define i1 @or7_ne7(i32 %x) {
 
 define <2 x i1> @or63_ne63_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @or63_ne63_vec(
-; CHECK-NEXT:    [[T1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 63, i8 63>
+; CHECK-NEXT:    [[T1:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 63)
 ; CHECK-NEXT:    ret <2 x i1> [[T1]]
 ;
   %t0 = or <2 x i8> %x, <i8 63, i8 63>
@@ -3010,7 +3010,7 @@ define i1 @orC_eqC(i32 %x) {
 
 define <2 x i1> @orC_eqC_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @orC_eqC_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 -44, i8 -44>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -44)
 ; CHECK-NEXT:    [[T1:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[T1]]
 ;
@@ -3036,7 +3036,7 @@ define i1 @orC_neC(i32 %x) {
 
 define <2 x i1> @orC_neC_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @orC_neC_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 42, i8 42>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 42)
 ; CHECK-NEXT:    [[T1:%.*]] = icmp ne <2 x i8> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[T1]]
 ;
@@ -3058,8 +3058,8 @@ define i1 @shrink_constant(i32 %X) {
 
 define <2 x i1> @shrink_constant_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @shrink_constant_vec(
-; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -12, i32 -12>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[XOR]], <i32 4, i32 4>
+; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 -12)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[XOR]], splat (i32 4)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %xor = xor <2 x i32> %X, <i32 -9, i32 -9>
@@ -3080,7 +3080,7 @@ define i1 @icmp_sub_-1_X_ult_4(i32 %X) {
 
 define <2 x i1> @icmp_xor_neg4_X_ult_4_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_xor_neg4_X_ult_4_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 -5, i32 -5>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 -5)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %xor = xor <2 x i32> %X, <i32 -4, i32 -4>
@@ -3100,7 +3100,7 @@ define i1 @icmp_sub_-1_X_uge_4(i32 %X) {
 
 define <2 x i1> @icmp_xor_neg4_X_uge_4_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @icmp_xor_neg4_X_uge_4_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 -4, i32 -4>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 -4)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %xor = xor <2 x i32> %X, <i32 -4, i32 -4>
@@ -3110,7 +3110,7 @@ define <2 x i1> @icmp_xor_neg4_X_uge_4_vec(<2 x i32> %X) {
 
 define <2 x i1> @xor_ult(<2 x i8> %x) {
 ; CHECK-LABEL: @xor_ult(
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %xor = xor <2 x i8> %x, <i8 -4, i8 -4>
@@ -3133,7 +3133,7 @@ define i1 @xor_ult_extra_use(i8 %x, ptr %p) {
 
 define <2 x i1> @xor_ugt(<2 x i8> %x) {
 ; CHECK-LABEL: @xor_ugt(
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], splat (i8 7)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %xor = xor <2 x i8> %x, <i8 7, i8 7>
@@ -3219,7 +3219,7 @@ define <2 x i1> @icmp_and_or_lshr_vec(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i1> @icmp_and_or_lshr_vec_commute(<2 x i32> %xp, <2 x i32> %y) {
 ; CHECK-LABEL: @icmp_and_or_lshr_vec_commute(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> [[XP:%.*]], <i32 42, i32 42>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> [[XP:%.*]], splat (i32 42)
 ; CHECK-NEXT:    [[SHF:%.*]] = lshr <2 x i32> [[X]], [[Y:%.*]]
 ; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[X]], [[SHF]]
 ; CHECK-NEXT:    [[RET:%.*]] = trunc <2 x i32> [[OR]] to <2 x i1>
@@ -3248,7 +3248,7 @@ define i1 @icmp_and_or_lshr_cst(i32 %x) {
 
 define <2 x i1> @icmp_and_or_lshr_cst_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 3)
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
@@ -3287,8 +3287,8 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_poison(<2 x i32> %x) {
 
 define <2 x i1> @icmp_and_or_lshr_cst_vec_commute(<2 x i32> %xp) {
 ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_commute(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> [[XP:%.*]], <i32 42, i32 42>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X]], <i32 3, i32 3>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> [[XP:%.*]], splat (i32 42)
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X]], splat (i32 3)
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
 ;
@@ -3302,7 +3302,7 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_commute(<2 x i32> %xp) {
 
 define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform_commute(<2 x i32> %xp) {
 ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_nonuniform_commute(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> [[XP:%.*]], <i32 42, i32 42>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> [[XP:%.*]], splat (i32 42)
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X]], <i32 3, i32 5>
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
@@ -3317,7 +3317,7 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform_commute(<2 x i32> %xp) {
 
 define <2 x i1> @icmp_and_or_lshr_cst_vec_poison_commute(<2 x i32> %xp) {
 ; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_poison_commute(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> [[XP:%.*]], <i32 42, i32 42>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> [[XP:%.*]], splat (i32 42)
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X]], <i32 3, i32 poison>
 ; CHECK-NEXT:    [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[RET]]
@@ -3342,7 +3342,7 @@ define i1 @shl_ap1_zero_ap2_non_zero_2(i32 %a) {
 
 define <2 x i1> @shl_ap1_zero_ap2_non_zero_2_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_2_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[A:%.*]], <i32 29, i32 29>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 29)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i32> <i32 4, i32 4>, %a
@@ -3705,8 +3705,8 @@ define i1 @cmp_inverse_mask_bits_set_eq(i32 %x) {
 
 define <2 x i1> @cmp_inverse_mask_bits_set_eq_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @cmp_inverse_mask_bits_set_eq_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 -43, i32 -43>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 -43, i32 -43>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -43)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 -43)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %or = or <2 x i32> %x, <i32 42, i32 42>
@@ -3776,8 +3776,8 @@ define i1 @ugtKnownBits(i8 %a) {
 
 define <2 x i1> @ugtKnownBitsVec(<2 x i8> %a) {
 ; CHECK-LABEL: @ugtKnownBitsVec(
-; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[A:%.*]], <i8 17, i8 17>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[B]], <i8 17, i8 17>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 17)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[B]], splat (i8 17)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %b = and <2 x i8> %a, <i8 17, i8 17>
@@ -4055,9 +4055,9 @@ define i1 @knownbits3(i8 %a, i8 %b) {
 
 define <2 x i1> @knownbits4(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @knownbits4(
-; CHECK-NEXT:    [[A1:%.*]] = and <2 x i8> [[A:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[B1:%.*]] = and <2 x i8> [[B:%.*]], <i8 2, i8 2>
-; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], <i8 1, i8 1>
+; CHECK-NEXT:    [[A1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 1)
+; CHECK-NEXT:    [[B1:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], splat (i8 1)
 ; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], [[A1]]
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
@@ -4105,9 +4105,9 @@ define i1 @knownbits6(i8 %a, i8 %b) {
 
 define <2 x i1> @knownbits7(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @knownbits7(
-; CHECK-NEXT:    [[A1:%.*]] = and <2 x i8> [[A:%.*]], <i8 -127, i8 -127>
-; CHECK-NEXT:    [[B1:%.*]] = and <2 x i8> [[B:%.*]], <i8 2, i8 2>
-; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], <i8 1, i8 1>
+; CHECK-NEXT:    [[A1:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 -127)
+; CHECK-NEXT:    [[B1:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], splat (i8 1)
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[TMP1]], [[A1]]
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
@@ -4226,7 +4226,7 @@ define i1 @signbit_bitcast_fptrunc(float %x) {
 define <2 x i1> @signbit_bitcast_fptrunc_vec(<2 x double> %x) {
 ; CHECK-LABEL: @signbit_bitcast_fptrunc_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x double> [[X:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <2 x i64> [[TMP1]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <2 x i64> [[TMP1]], splat (i64 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %f = fptrunc <2 x double> %x to <2 x half>
@@ -4252,7 +4252,7 @@ define <4 x i1> @signbit_bitcast_fpext_vec_wrong_bitcast(<2 x half> %x) {
 ; CHECK-LABEL: @signbit_bitcast_fpext_vec_wrong_bitcast(
 ; CHECK-NEXT:    [[F:%.*]] = fpext <2 x half> [[X:%.*]] to <2 x float>
 ; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x float> [[F]] to <4 x i16>
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i16> [[B]], <i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i16> [[B]], splat (i16 -1)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %f = fpext <2 x half> %x to <2 x float>
@@ -4522,9 +4522,9 @@ define i1 @redundant_sign_bit_count_i8(i8 %x) {
 
 define <2 x i1> @redundant_sign_bit_count_ult_31_30_vector(<2 x i32> %xsrc) {
 ; CHECK-LABEL: @redundant_sign_bit_count_ult_31_30_vector(
-; CHECK-NEXT:    [[X:%.*]] = mul <2 x i32> [[XSRC:%.*]], <i32 13, i32 13>
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X]], <i32 1073741824, i32 1073741824>
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i32> [[TMP1]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[X:%.*]] = mul <2 x i32> [[XSRC:%.*]], splat (i32 13)
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X]], splat (i32 1073741824)
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i32> [[TMP1]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %x = mul <2 x i32> %xsrc, <i32 13, i32 13> ; thwart complexity-based canonicalization
@@ -4598,7 +4598,7 @@ define i1 @zext_bool_and_eq0(i1 %x, i8 %y) {
 
 define <2 x i1> @zext_bool_and_eq0_commute(<2 x i1> %x, <2 x i8> %p) {
 ; CHECK-LABEL: @zext_bool_and_eq0_commute(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[P:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[P:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[R1:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[NOT_X:%.*]] = xor <2 x i1> [[X:%.*]], <i1 true, i1 true>
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[NOT_X]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[R1]]
@@ -4728,7 +4728,7 @@ define i1 @or_positive_sgt_zero(i8 %a) {
 
 define <2 x i1> @or_postive_sgt_zero_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @or_postive_sgt_zero_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
 
@@ -4759,7 +4759,7 @@ define i1 @or_positive_sge_zero(i8 %a) {
 
 define <2 x i1> @or_postive_sge_zero_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @or_postive_sge_zero_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
 
@@ -4790,7 +4790,7 @@ define i1 @or_positive_sge_postive(i8 %a) {
 
 define <2 x i1> @or_postive_sge_positive_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @or_postive_sge_positive_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
 
@@ -4916,7 +4916,7 @@ define i1 @or_positive_sgt_neg(i8 %a) {
 
 define <2 x i1> @or_postive_sgt_neg_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @or_postive_sgt_neg_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[A:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
 
@@ -4938,8 +4938,8 @@ define i1 @mul_or_positive_sge_neg(i8 %a) {
 
 define <2 x i1> @or_postive_sge_neg_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @or_postive_sge_neg_vec(
-; CHECK-NEXT:    [[B:%.*]] = or <2 x i8> [[A:%.*]], <i8 24, i8 24>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[B]], <i8 -2, i8 -2>
+; CHECK-NEXT:    [[B:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 24)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[B]], splat (i8 -2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
 
@@ -4961,8 +4961,8 @@ define i1 @mul_or_small_sge_large(i8 %a) {
 
 define <2 x i1> @or_small_sge_large_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @or_small_sge_large_vec(
-; CHECK-NEXT:    [[B:%.*]] = or <2 x i8> [[A:%.*]], <i8 24, i8 24>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[B]], <i8 24, i8 24>
+; CHECK-NEXT:    [[B:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 24)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[B]], splat (i8 24)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
 
@@ -5005,8 +5005,8 @@ define i1 @or_positive_slt_neg(i8 %a) {
 
 define <2 x i1> @or_postive_slt_neg_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @or_postive_slt_neg_vec(
-; CHECK-NEXT:    [[B:%.*]] = or <2 x i8> [[A:%.*]], <i8 24, i8 24>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[B]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[B:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 24)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[B]], splat (i8 -1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
 
@@ -5028,8 +5028,8 @@ define i1 @or_small_slt_large(i8 %a) {
 
 define <2 x i1> @or_small_slt_large_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @or_small_slt_large_vec(
-; CHECK-NEXT:    [[B:%.*]] = or <2 x i8> [[A:%.*]], <i8 24, i8 24>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[B]], <i8 25, i8 25>
+; CHECK-NEXT:    [[B:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 24)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[B]], splat (i8 25)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
 
@@ -5289,7 +5289,7 @@ define i1 @test_icmp_shl_nsw_i31(i31 %x) {
 define <2 x i1> @test_icmp_shl_vec(<2 x i64> %x) {
 ; CHECK-LABEL: @test_icmp_shl_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 3, i32 3>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %shl = shl <2 x i64> %x, splat(i64 32)
diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll
index 6cbb2a246f5a4a..8bc915e695aa75 100644
--- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll
@@ -725,7 +725,7 @@ define <4 x float> @insert_demanded_element_op1(<4 x float> %x, <4 x float> %y)
 define <4 x float> @splat_constant(<4 x float> %x) {
 ; CHECK-LABEL: @splat_constant(
 ; CHECK-NEXT:    [[INS3:%.*]] = insertelement <4 x float> [[X:%.*]], float 3.000000e+00, i64 3
-; CHECK-NEXT:    [[R:%.*]] = fadd <4 x float> [[INS3]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[R:%.*]] = fadd <4 x float> [[INS3]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %ins3 = insertelement <4 x float> %x, float 3.0, i32 3
diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
index c87e2e8596c62d..f51e444a815c80 100644
--- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -781,7 +781,7 @@ define <5 x float> @insert_demanded_element_unequal_length_op1(<4 x float> %x, <
 define <4 x float> @splat_constant(<4 x float> %x) {
 ; CHECK-LABEL: @splat_constant(
 ; CHECK-NEXT:    [[INS3:%.*]] = insertelement <4 x float> [[X:%.*]], float 3.000000e+00, i64 3
-; CHECK-NEXT:    [[R:%.*]] = fadd <4 x float> [[INS3]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[R:%.*]] = fadd <4 x float> [[INS3]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
   %ins3 = insertelement <4 x float> %x, float 3.0, i32 3
diff --git a/llvm/test/Transforms/InstCombine/insertelement.ll b/llvm/test/Transforms/InstCombine/insertelement.ll
index c8df2db6e70caa..72dcd929ba0cb8 100644
--- a/llvm/test/Transforms/InstCombine/insertelement.ll
+++ b/llvm/test/Transforms/InstCombine/insertelement.ll
@@ -15,7 +15,7 @@ define <4 x i32> @insert_known_idx(<4 x i32> %x) {
 
 define <4 x i32> @insert_unknown_idx(<4 x i32> %x, i32 %idx) {
 ; CHECK-LABEL: @insert_unknown_idx(
-; CHECK-NEXT:    [[V1:%.*]] = and <4 x i32> [[X:%.*]], <i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:    [[V1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 7)
 ; CHECK-NEXT:    [[V2:%.*]] = insertelement <4 x i32> [[V1]], i32 6, i32 [[IDX:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[V2]]
 ;
@@ -27,7 +27,7 @@ define <4 x i32> @insert_unknown_idx(<4 x i32> %x, i32 %idx) {
 
 define <2 x i8> @insert_known_any_idx(<2 x i8> %xx, i8 %yy, i32 %idx) {
 ; CHECK-LABEL: @insert_known_any_idx(
-; CHECK-NEXT:    ret <2 x i8> <i8 16, i8 16>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 16)
 ;
   %x = or <2 x i8> %xx, <i8 16, i8 16>
   %y = or i8 %yy, 16
@@ -42,7 +42,7 @@ define <2 x i8> @insert_known_any_idx_fail1(<2 x i8> %xx, i8 %yy, i32 %idx) {
 ; CHECK-NEXT:    [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 17, i8 33>
 ; CHECK-NEXT:    [[Y:%.*]] = or i8 [[YY:%.*]], 16
 ; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 [[IDX:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[INS]], <i8 16, i8 16>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[INS]], splat (i8 16)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %x = or <2 x i8> %xx, <i8 17, i8 33>
@@ -59,7 +59,7 @@ define <2 x i8> @insert_known_any_idx_fail2(<2 x i8> %xx, i8 %yy, i32 %idx) {
 ; CHECK-NEXT:    [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 17, i8 31>
 ; CHECK-NEXT:    [[Y:%.*]] = or i8 [[YY:%.*]], 15
 ; CHECK-NEXT:    [[INS:%.*]] = insertelement <2 x i8> [[X]], i8 [[Y]], i32 [[IDX:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[INS]], <i8 16, i8 16>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[INS]], splat (i8 16)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %x = or <2 x i8> %xx, <i8 17, i8 31>
diff --git a/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll b/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll
index afd56abf40a507..652bdc21fad2eb 100644
--- a/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll
+++ b/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll
@@ -74,8 +74,8 @@ define i8 @t3_commutative(i8 %x) {
 ; Basic splat vector test
 define <2 x i8> @t4_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @t4_splat(
-; CHECK-NEXT:    [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], <i8 15, i8 15>
-; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], <i8 -16, i8 -16>
+; CHECK-NEXT:    [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15)
+; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], splat (i8 -16)
 ; CHECK-NEXT:    ret <2 x i8> [[X_ROUNDEDUP]]
 ;
   %x.lowbits = and <2 x i8> %x, <i8 15, i8 15>
@@ -89,8 +89,8 @@ define <2 x i8> @t4_splat(<2 x i8> %x) {
 ; Splat-with-poison
 define <2 x i8> @t5_splat_poison_0b0001(<2 x i8> %x) {
 ; CHECK-LABEL: @t5_splat_poison_0b0001(
-; CHECK-NEXT:    [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], <i8 15, i8 15>
-; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], <i8 -16, i8 -16>
+; CHECK-NEXT:    [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15)
+; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], splat (i8 -16)
 ; CHECK-NEXT:    ret <2 x i8> [[X_ROUNDEDUP]]
 ;
   %x.lowbits = and <2 x i8> %x, <i8 15, i8 15>
@@ -102,8 +102,8 @@ define <2 x i8> @t5_splat_poison_0b0001(<2 x i8> %x) {
 }
 define <2 x i8> @t5_splat_poison_0b0010(<2 x i8> %x) {
 ; CHECK-LABEL: @t5_splat_poison_0b0010(
-; CHECK-NEXT:    [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], <i8 15, i8 15>
-; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], <i8 -16, i8 -16>
+; CHECK-NEXT:    [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15)
+; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], splat (i8 -16)
 ; CHECK-NEXT:    ret <2 x i8> [[X_ROUNDEDUP]]
 ;
   %x.lowbits = and <2 x i8> %x, <i8 15, i8 15>
@@ -115,8 +115,8 @@ define <2 x i8> @t5_splat_poison_0b0010(<2 x i8> %x) {
 }
 define <2 x i8> @t5_splat_poison_0b0100(<2 x i8> %x) {
 ; CHECK-LABEL: @t5_splat_poison_0b0100(
-; CHECK-NEXT:    [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], <i8 15, i8 15>
-; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], <i8 -16, i8 -16>
+; CHECK-NEXT:    [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15)
+; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], splat (i8 -16)
 ; CHECK-NEXT:    ret <2 x i8> [[X_ROUNDEDUP]]
 ;
   %x.lowbits = and <2 x i8> %x, <i8 15, i8 15>
@@ -128,8 +128,8 @@ define <2 x i8> @t5_splat_poison_0b0100(<2 x i8> %x) {
 }
 define <2 x i8> @t5_splat_poison_0b1000(<2 x i8> %x) {
 ; CHECK-LABEL: @t5_splat_poison_0b1000(
-; CHECK-NEXT:    [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], <i8 15, i8 15>
-; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], <i8 -16, i8 -16>
+; CHECK-NEXT:    [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 15)
+; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], splat (i8 -16)
 ; CHECK-NEXT:    ret <2 x i8> [[X_ROUNDEDUP]]
 ;
   %x.lowbits = and <2 x i8> %x, <i8 15, i8 poison>
@@ -161,10 +161,10 @@ define <2 x i8> @t6_nonsplat(<2 x i8> %x) {
 ; Even if the alignment (and masks) are splat, the bias could be non-splat
 define <2 x i8> @t7_nonsplat_bias(<2 x i8> %x) {
 ; CHECK-LABEL: @t7_nonsplat_bias(
-; CHECK-NEXT:    [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], <i8 15, i8 15>
+; CHECK-NEXT:    [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 15)
 ; CHECK-NEXT:    [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], zeroinitializer
 ; CHECK-NEXT:    [[X_BIASED:%.*]] = add <2 x i8> [[X]], <i8 15, i8 16>
-; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], <i8 -16, i8 -16>
+; CHECK-NEXT:    [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], splat (i8 -16)
 ; CHECK-NEXT:    [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i8> [[X]], <2 x i8> [[X_BIASED_HIGHBITS]]
 ; CHECK-NEXT:    ret <2 x i8> [[X_ROUNDEDUP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/intrinsic-select.ll b/llvm/test/Transforms/InstCombine/intrinsic-select.ll
index f110d7765830ef..4ce2908a630785 100644
--- a/llvm/test/Transforms/InstCombine/intrinsic-select.ll
+++ b/llvm/test/Transforms/InstCombine/intrinsic-select.ll
@@ -297,7 +297,7 @@ define double @test_fabs_select1(double %a) {
 define <2 x double> @test_fabs_select1_vec(<2 x double> %a) {
 ; CHECK-LABEL: @test_fabs_select1_vec(
 ; CHECK-NEXT:    [[COND:%.*]] = fcmp uno <2 x double> [[A:%.*]], zeroinitializer
-; CHECK-NEXT:    [[SEL2:%.*]] = select <2 x i1> [[COND]], <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, <2 x double> [[A]]
+; CHECK-NEXT:    [[SEL2:%.*]] = select <2 x i1> [[COND]], <2 x double> splat (double 0x7FF8000000000000), <2 x double> [[A]]
 ; CHECK-NEXT:    ret <2 x double> [[SEL2]]
 ;
   %cond = fcmp uno <2 x double> %a, zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/intrinsics.ll b/llvm/test/Transforms/InstCombine/intrinsics.ll
index c8d70e17cd3921..afc4c390c28f8b 100644
--- a/llvm/test/Transforms/InstCombine/intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/intrinsics.ll
@@ -62,7 +62,7 @@ define i32 @cttz(i32 %a) {
 
 define <2 x i32> @cttz_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @cttz_vec(
-; CHECK-NEXT:    ret <2 x i32> <i32 3, i32 3>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 3)
 ;
   %or = or <2 x i32> %a, <i32 8, i32 8>
   %and = and <2 x i32> %or, <i32 -8, i32 -8>
@@ -149,7 +149,7 @@ define i32 @cttz_knownbits2(i32 %arg) {
 
 define <2 x i32> @cttz_knownbits2_vec(<2 x i32> %arg) {
 ; CHECK-LABEL: @cttz_knownbits2_vec(
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], splat (i32 4)
 ; CHECK-NEXT:    [[CNT:%.*]] = call range(i32 0, 3) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true) #[[ATTR2]]
 ; CHECK-NEXT:    ret <2 x i32> [[CNT]]
 ;
@@ -190,7 +190,7 @@ define i8 @ctlz(i8 %a) {
 
 define <2 x i8> @ctlz_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @ctlz_vec(
-; CHECK-NEXT:    ret <2 x i8> <i8 2, i8 2>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 2)
 ;
   %or = or <2 x i8> %a, <i8 32, i8 32>
   %and = and <2 x i8> %or, <i8 63, i8 63>
@@ -266,7 +266,7 @@ define i8 @ctlz_knownbits2(i8 %arg) {
 
 define <2 x i8> @ctlz_knownbits2_vec(<2 x i8> %arg) {
 ; CHECK-LABEL: @ctlz_knownbits2_vec(
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], <i8 32, i8 32>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], splat (i8 32)
 ; CHECK-NEXT:    [[CNT:%.*]] = call range(i8 0, 3) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[OR]], i1 true) #[[ATTR2]]
 ; CHECK-NEXT:    ret <2 x i8> [[CNT]]
 ;
@@ -324,7 +324,7 @@ define i32 @ctlz_no_zero(i32 %a) {
 
 define <2 x i32> @ctlz_no_zero_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @ctlz_no_zero_vec(
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[A:%.*]], <i32 8, i32 8>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 8)
 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call range(i32 0, 29) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[OR]], i1 true)
 ; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
 ;
@@ -362,7 +362,7 @@ define i32 @cttz_no_zero(i32 %a) {
 
 define <2 x i32> @cttz_no_zero_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @cttz_no_zero_vec(
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[A:%.*]], <i32 8, i32 8>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 8)
 ; CHECK-NEXT:    [[CTTZ:%.*]] = tail call range(i32 0, 4) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true)
 ; CHECK-NEXT:    ret <2 x i32> [[CTTZ]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/is_fpclass.ll b/llvm/test/Transforms/InstCombine/is_fpclass.ll
index 24a331dfb48bb0..c1809b8bec61cb 100644
--- a/llvm/test/Transforms/InstCombine/is_fpclass.ll
+++ b/llvm/test/Transforms/InstCombine/is_fpclass.ll
@@ -504,7 +504,7 @@ define i1 @test_class_is_pinf_or_nan_f32(float %x) {
 
 define <2 x i1> @test_class_is_pinf_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: @test_class_is_pinf_v2f32(
-; CHECK-NEXT:    [[VAL:%.*]] = fcmp oeq <2 x float> [[X:%.*]], <float 0x7FF0000000000000, float 0x7FF0000000000000>
+; CHECK-NEXT:    [[VAL:%.*]] = fcmp oeq <2 x float> [[X:%.*]], splat (float 0x7FF0000000000000)
 ; CHECK-NEXT:    ret <2 x i1> [[VAL]]
 ;
   %val = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 512) ; fcPosInf
@@ -531,7 +531,7 @@ define i1 @test_class_is_ninf_or_nan_f32(float %x) {
 
 define <2 x i1> @test_class_is_ninf_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: @test_class_is_ninf_v2f32(
-; CHECK-NEXT:    [[VAL:%.*]] = fcmp oeq <2 x float> [[X:%.*]], <float 0xFFF0000000000000, float 0xFFF0000000000000>
+; CHECK-NEXT:    [[VAL:%.*]] = fcmp oeq <2 x float> [[X:%.*]], splat (float 0xFFF0000000000000)
 ; CHECK-NEXT:    ret <2 x i1> [[VAL]]
 ;
   %val = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 4) ; fcNegInf
@@ -551,7 +551,7 @@ define i1 @test_class_is_inf_f32(float %x) {
 define <2 x i1> @test_class_is_inf_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: @test_class_is_inf_v2f32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X:%.*]])
-; CHECK-NEXT:    [[VAL:%.*]] = fcmp oeq <2 x float> [[TMP1]], <float 0x7FF0000000000000, float 0x7FF0000000000000>
+; CHECK-NEXT:    [[VAL:%.*]] = fcmp oeq <2 x float> [[TMP1]], splat (float 0x7FF0000000000000)
 ; CHECK-NEXT:    ret <2 x i1> [[VAL]]
 ;
   %val = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 516) ; fcInf
@@ -1300,7 +1300,7 @@ define i1 @test_no_fold_and_class_f32_0(float %a, float %b) {
 
 define <2 x i1> @test_fold_and_class_v2f32(<2 x float> %a) {
 ; CHECK-LABEL: @test_fold_and_class_v2f32(
-; CHECK-NEXT:    [[CLASS1:%.*]] = fcmp ueq <2 x float> [[A:%.*]], <float 0xFFF0000000000000, float 0xFFF0000000000000>
+; CHECK-NEXT:    [[CLASS1:%.*]] = fcmp ueq <2 x float> [[A:%.*]], splat (float 0xFFF0000000000000)
 ; CHECK-NEXT:    ret <2 x i1> [[CLASS1]]
 ;
   %class0 = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %a, i32 7)
diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll
index 348508769c581f..9d42a439d3888c 100644
--- a/llvm/test/Transforms/InstCombine/ispow2.ll
+++ b/llvm/test/Transforms/InstCombine/ispow2.ll
@@ -16,7 +16,7 @@ define i1 @is_pow2or0_negate_op(i32 %x) {
 define <2 x i1> @is_pow2or0_negate_op_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @is_pow2or0_negate_op_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 2, i32 2>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %neg = sub <2 x i32> zeroinitializer, %x
@@ -40,7 +40,7 @@ define i1 @is_pow2or0_decrement_op(i8 %x) {
 define <2 x i1> @is_pow2or0_decrement_op_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @is_pow2or0_decrement_op_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i8> [[TMP1]], <i8 2, i8 2>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i8> [[TMP1]], splat (i8 2)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %dec = add <2 x i8> %x, <i8 -1, i8 -1>
@@ -64,7 +64,7 @@ define i1 @isnot_pow2or0_negate_op(i32 %x) {
 define <2 x i1> @isnot_pow2or0_negate_op_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @isnot_pow2or0_negate_op_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %neg = sub <2 x i32> zeroinitializer, %x
@@ -88,7 +88,7 @@ define i1 @isnot_pow2or0_decrement_op(i8 %x) {
 define <2 x i1> @isnot_pow2or0_decrement_op_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @isnot_pow2or0_decrement_op_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> [[TMP1]], <i8 1, i8 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <2 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %dec = add <2 x i8> %x, <i8 -1, i8 -1>
@@ -312,7 +312,7 @@ define i1 @is_pow2_ctpop_extra_uses_logical(i32 %x) {
 define <2 x i1> @is_pow2_ctpop_commute_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @is_pow2_ctpop_commute_vec(
 ; CHECK-NEXT:    [[T0:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[T0]], <i8 1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[T0]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x)
@@ -519,7 +519,7 @@ define i1 @isnot_pow2_ctpop_extra_uses_logical(i32 %x) {
 define <2 x i1> @isnot_pow2_ctpop_commute_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @isnot_pow2_ctpop_commute_vec(
 ; CHECK-NEXT:    [[T0:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[T0]], <i8 1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[T0]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x)
@@ -656,7 +656,7 @@ define i1 @is_pow2_negate_op_logical(i32 %x) {
 define <2 x i1> @is_pow2_negate_op_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @is_pow2_negate_op_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %neg = sub <2 x i32> zeroinitializer, %x
@@ -698,7 +698,7 @@ define i1 @is_pow2_decrement_op_logical(i8 %x) {
 define <2 x i1> @is_pow2_decrement_op_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @is_pow2_decrement_op_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], <i8 1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %dec = add <2 x i8> %x, <i8 -1, i8 -1>
@@ -740,7 +740,7 @@ define i1 @isnot_pow2_negate_op_logical(i32 %x) {
 define <2 x i1> @isnot_pow2_negate_op_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @isnot_pow2_negate_op_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call range(i32 0, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %neg = sub <2 x i32> zeroinitializer, %x
@@ -782,7 +782,7 @@ define i1 @isnot_pow2_decrement_op_logical(i8 %x) {
 define <2 x i1> @isnot_pow2_decrement_op_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @isnot_pow2_decrement_op_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], <i8 1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %dec = add <2 x i8> %x, <i8 -1, i8 -1>
@@ -837,7 +837,7 @@ define i1 @is_pow2or0_ctpop_logical(i32 %x) {
 define <2 x i1> @is_pow2or0_ctpop_commute_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @is_pow2or0_ctpop_commute_vec(
 ; CHECK-NEXT:    [[T0:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[T0]], <i8 2, i8 2>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i8> [[T0]], splat (i8 2)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x)
@@ -1030,7 +1030,7 @@ define i1 @isnot_pow2nor0_ctpop_logical(i32 %x) {
 define <2 x i1> @isnot_pow2nor0_ctpop_commute_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @isnot_pow2nor0_ctpop_commute_vec(
 ; CHECK-NEXT:    [[T0:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctpop.v2i8(<2 x i8> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[T0]], <i8 1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[T0]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %t0 = tail call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x)
@@ -1492,7 +1492,7 @@ define i1 @is_pow2_or_z_known_bits(i32 %xin) {
 
 define <2 x i1> @not_pow2_or_z_known_bits(<2 x i32> %xin) {
 ; CHECK-LABEL: @not_pow2_or_z_known_bits(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[XIN:%.*]], <i32 -65, i32 -65>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[XIN:%.*]], splat (i32 -65)
 ; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
@@ -1504,9 +1504,9 @@ define <2 x i1> @not_pow2_or_z_known_bits(<2 x i32> %xin) {
 
 define <2 x i1> @not_pow2_or_z_known_bits_fail_wrong_cmp(<2 x i32> %xin) {
 ; CHECK-LABEL: @not_pow2_or_z_known_bits_fail_wrong_cmp(
-; CHECK-NEXT:    [[X:%.*]] = or <2 x i32> [[XIN:%.*]], <i32 64, i32 64>
+; CHECK-NEXT:    [[X:%.*]] = or <2 x i32> [[XIN:%.*]], splat (i32 64)
 ; CHECK-NEXT:    [[CNT:%.*]] = call range(i32 1, 33) <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i32> [[CNT]], <i32 2, i32 2>
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i32> [[CNT]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %x = or <2 x i32> %xin, <i32 64, i32 64>
diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll
index aa24ec3905f133..c9e7cbae3d9395 100644
--- a/llvm/test/Transforms/InstCombine/known-bits.ll
+++ b/llvm/test/Transforms/InstCombine/known-bits.ll
@@ -939,10 +939,10 @@ define i1 @extract_value_uadd_fail2(<2 x i8> %xx, <2 x i8> %yy, i32 %idx) {
 
 define i1 @extract_value_uadd_fail3(<2 x i8> %xx, <2 x i8> %yy) {
 ; CHECK-LABEL: @extract_value_uadd_fail3(
-; CHECK-NEXT:    [[X0:%.*]] = and <2 x i8> [[XX:%.*]], <i8 127, i8 127>
-; CHECK-NEXT:    [[Y0:%.*]] = and <2 x i8> [[YY:%.*]], <i8 127, i8 127>
-; CHECK-NEXT:    [[X:%.*]] = add nuw <2 x i8> [[X0]], <i8 1, i8 1>
-; CHECK-NEXT:    [[Y:%.*]] = add nuw <2 x i8> [[Y0]], <i8 1, i8 1>
+; CHECK-NEXT:    [[X0:%.*]] = and <2 x i8> [[XX:%.*]], splat (i8 127)
+; CHECK-NEXT:    [[Y0:%.*]] = and <2 x i8> [[YY:%.*]], splat (i8 127)
+; CHECK-NEXT:    [[X:%.*]] = add nuw <2 x i8> [[X0]], splat (i8 1)
+; CHECK-NEXT:    [[Y:%.*]] = add nuw <2 x i8> [[Y0]], splat (i8 1)
 ; CHECK-NEXT:    [[ADD_UOV:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[X]], <2 x i8> [[Y]])
 ; CHECK-NEXT:    [[ADD:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 0
 ; CHECK-NEXT:    [[UOV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[ADD_UOV]], 1
diff --git a/llvm/test/Transforms/InstCombine/ldexp-ext.ll b/llvm/test/Transforms/InstCombine/ldexp-ext.ll
index 58710005d6cce0..3fac72a7858e48 100644
--- a/llvm/test/Transforms/InstCombine/ldexp-ext.ll
+++ b/llvm/test/Transforms/InstCombine/ldexp-ext.ll
@@ -47,7 +47,7 @@ define double @ldexp_zext_double_fast_math(double %x, i1 %bool) {
 
 define <2 x float> @ldexp_zext_float_vector(<2 x float> %x, <2 x i1> %bool) {
 ; CHECK-LABEL: @ldexp_zext_float_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> splat (float 2.000000e+00), <2 x float> splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[LDEXP:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x float> [[LDEXP]]
 ;
@@ -102,7 +102,7 @@ define double @ldexp_sext_double_fast_math(double %x, i1 %bool) {
 
 define <2 x float> @ldexp_sext_float_vector(<2 x float> %x, <2 x i1> %bool) {
 ; CHECK-LABEL: @ldexp_sext_float_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> <float 5.000000e-01, float 5.000000e-01>, <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[BOOL:%.*]], <2 x float> splat (float 5.000000e-01), <2 x float> splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[LDEXP:%.*]] = fmul <2 x float> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x float> [[LDEXP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/ldexp.ll b/llvm/test/Transforms/InstCombine/ldexp.ll
index 7ae483cbda0e22..8908d476b4a2c0 100644
--- a/llvm/test/Transforms/InstCombine/ldexp.ll
+++ b/llvm/test/Transforms/InstCombine/ldexp.ll
@@ -830,7 +830,7 @@ define float @ldexp_127(float %x) {
 define <2 x float> @ldexp_3_vector(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @ldexp_3_vector
 ; CHECK-SAME: (<2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[LDEXP:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> <i32 3, i32 3>)
+; CHECK-NEXT:    [[LDEXP:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> splat (i32 3))
 ; CHECK-NEXT:    ret <2 x float> [[LDEXP]]
 ;
   %ldexp = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %x, <2 x i32> <i32 3, i32 3>)
diff --git a/llvm/test/Transforms/InstCombine/load-store-forward.ll b/llvm/test/Transforms/InstCombine/load-store-forward.ll
index dbc68044c11a0d..b3fa3dae80379f 100644
--- a/llvm/test/Transforms/InstCombine/load-store-forward.ll
+++ b/llvm/test/Transforms/InstCombine/load-store-forward.ll
@@ -347,7 +347,7 @@ define i1 @load_after_memset_1_i1(ptr %a) {
 define <4 x i8> @load_after_memset_1_vec(ptr %a) {
 ; CHECK-LABEL: @load_after_memset_1_vec(
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(16) [[A:%.*]], i8 1, i64 16, i1 false)
-; CHECK-NEXT:    ret <4 x i8> <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    ret <4 x i8> splat (i8 1)
 ;
   call void @llvm.memset.p0.i64(ptr %a, i8 1, i64 16, i1 false)
   %v = load <4 x i8>, ptr %a
diff --git a/llvm/test/Transforms/InstCombine/log-pow.ll b/llvm/test/Transforms/InstCombine/log-pow.ll
index bfa636e470bb45..374115953145d8 100644
--- a/llvm/test/Transforms/InstCombine/log-pow.ll
+++ b/llvm/test/Transforms/InstCombine/log-pow.ll
@@ -137,7 +137,7 @@ define double @log10_exp(double %x) {
 
 define <2 x float> @logv_exp2v(<2 x float> %x) {
 ; CHECK-LABEL: @logv_exp2v(
-; CHECK-NEXT:    [[MUL:%.*]] = fmul fast <2 x float> [[X:%.*]], <float 0x3FE62E4300000000, float 0x3FE62E4300000000>
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 0x3FE62E4300000000)
 ; CHECK-NEXT:    ret <2 x float> [[MUL]]
 ;
   %exp = call fast <2 x float> @llvm.exp2.v2f32(<2 x float> %x)
diff --git a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
index cf0dc350328846..9e0c98bb340068 100644
--- a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll
@@ -345,7 +345,7 @@ define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i6
 ; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
 ; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i64> [[A:%.*]], [[BC1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
-; CHECK-NEXT:    [[BC2:%.*]] = xor <2 x i64> [[TMP1]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[BC2:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1)
 ; CHECK-NEXT:    [[AND2:%.*]] = and <2 x i64> [[B:%.*]], [[BC2]]
 ; CHECK-NEXT:    [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]]
diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll
index 62a63839704a44..4054c50efc2a13 100644
--- a/llvm/test/Transforms/InstCombine/logical-select.ll
+++ b/llvm/test/Transforms/InstCombine/logical-select.ll
@@ -350,7 +350,7 @@ define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i6
 ; CHECK-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
 ; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i64> [[A:%.*]], [[BC1]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
-; CHECK-NEXT:    [[BC2:%.*]] = xor <2 x i64> [[TMP1]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[BC2:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1)
 ; CHECK-NEXT:    [[AND2:%.*]] = and <2 x i64> [[B:%.*]], [[BC2]]
 ; CHECK-NEXT:    [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]]
@@ -830,7 +830,7 @@ define <2 x i16> @bitcast_vec_cond_commute3(<4 x i8> %cond, <2 x i16> %pc, <2 x
 define <2 x i64> @bitcast_fp_vec_cond(<2 x double> noundef %s, <2 x i64> %c, <2 x i64> %d) {
 ; CHECK-LABEL: @bitcast_fp_vec_cond(
 ; CHECK-NEXT:    [[T9:%.*]] = bitcast <2 x double> [[S:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[NOTT9:%.*]] = xor <2 x i64> [[T9]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[NOTT9:%.*]] = xor <2 x i64> [[T9]], splat (i64 -1)
 ; CHECK-NEXT:    [[T11:%.*]] = and <2 x i64> [[C:%.*]], [[NOTT9]]
 ; CHECK-NEXT:    [[T12:%.*]] = and <2 x i64> [[D:%.*]], [[T9]]
 ; CHECK-NEXT:    [[R:%.*]] = or disjoint <2 x i64> [[T11]], [[T12]]
@@ -850,7 +850,7 @@ define <2 x i64> @bitcast_int_vec_cond(i1 noundef %b, <2 x i64> %c, <2 x i64> %d
 ; CHECK-LABEL: @bitcast_int_vec_cond(
 ; CHECK-NEXT:    [[S:%.*]] = sext i1 [[B:%.*]] to i128
 ; CHECK-NEXT:    [[T9:%.*]] = bitcast i128 [[S]] to <2 x i64>
-; CHECK-NEXT:    [[NOTT9:%.*]] = xor <2 x i64> [[T9]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[NOTT9:%.*]] = xor <2 x i64> [[T9]], splat (i64 -1)
 ; CHECK-NEXT:    [[T11:%.*]] = and <2 x i64> [[C:%.*]], [[NOTT9]]
 ; CHECK-NEXT:    [[T12:%.*]] = and <2 x i64> [[D:%.*]], [[T9]]
 ; CHECK-NEXT:    [[R:%.*]] = or disjoint <2 x i64> [[T11]], [[T12]]
diff --git a/llvm/test/Transforms/InstCombine/low-bit-splat.ll b/llvm/test/Transforms/InstCombine/low-bit-splat.ll
index a86ab0359afd78..ac069fb138dbe0 100644
--- a/llvm/test/Transforms/InstCombine/low-bit-splat.ll
+++ b/llvm/test/Transforms/InstCombine/low-bit-splat.ll
@@ -31,7 +31,7 @@ define i16 @t1_otherbitwidth(i16 %x) {
 ; Basic positive vector tests
 define <2 x i8> @t2_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @t2_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i8> zeroinitializer, [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll
index 89522a00d78949..bf38a4707fc785 100644
--- a/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll
+++ b/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll
@@ -72,7 +72,7 @@ define i1 @scalar_i32_lshr_and_negC_ne(i32 %x, i32 %y) {
 define <4 x i1> @vec_4xi32_lshr_and_negC_eq(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_lshr_and_negC_eq(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], splat (i32 8)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
@@ -84,7 +84,7 @@ define <4 x i1> @vec_4xi32_lshr_and_negC_eq(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_lshr_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_lshr_and_negC_eq_poison1(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], splat (i32 8)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
@@ -96,7 +96,7 @@ define <4 x i1> @vec_lshr_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_lshr_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_lshr_and_negC_eq_poison2(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], splat (i32 8)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
@@ -108,7 +108,7 @@ define <4 x i1> @vec_lshr_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_lshr_and_negC_eq_poison3(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_lshr_and_negC_eq_poison3(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], splat (i32 8)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
diff --git a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll
index 0166680309ea8c..da3ed49c83d4ff 100644
--- a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll
+++ b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll
@@ -72,7 +72,7 @@ define i1 @scalar_i32_lshr_and_signbit_ne(i32 %x, i32 %y) {
 define <4 x i1> @vec_4xi32_lshr_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], splat (i32 -1)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
@@ -84,7 +84,7 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_poison1(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], splat (i32 -1)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
@@ -96,7 +96,7 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> %
 define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_poison2(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], splat (i32 -1)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
@@ -108,7 +108,7 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> %
 define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison3(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_poison3(
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], splat (i32 -1)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %lshr = lshr <4 x i32> %x, %y
diff --git a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll
index a77ad3a7ea74ee..03188c0c590c43 100644
--- a/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll
+++ b/llvm/test/Transforms/InstCombine/lshr-trunc-sext-to-ashr-sext.ll
@@ -64,7 +64,7 @@ define i16 @n3(i8 %x) {
 
 define <2 x i16> @t4_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @t4_vec_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 4)
 ; CHECK-NEXT:    [[C:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[C]]
 ;
@@ -76,7 +76,7 @@ define <2 x i16> @t4_vec_splat(<2 x i8> %x) {
 
 define <2 x i16> @t5_vec_poison(<2 x i8> %x) {
 ; CHECK-LABEL: @t5_vec_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 4)
 ; CHECK-NEXT:    [[C:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[C]]
 ;
@@ -143,7 +143,7 @@ define <2 x i16> @t9_extrause1_vec_poison(<2 x i8> %x) {
 ; CHECK-LABEL: @t9_extrause1_vec_poison(
 ; CHECK-NEXT:    [[A:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 4, i8 poison>
 ; CHECK-NEXT:    call void @usevec8(<2 x i8> [[A]])
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X]], <i8 4, i8 4>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X]], splat (i8 4)
 ; CHECK-NEXT:    [[C:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[C]]
 ;
@@ -188,7 +188,7 @@ define <2 x i16> @t11_extrause2_vec_poison(<2 x i8> %x) {
 
 define <2 x i10> @wide_source_shifted_signbit(<2 x i32> %x) {
 ; CHECK-LABEL: @wide_source_shifted_signbit(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 24, i32 24>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 24)
 ; CHECK-NEXT:    [[C:%.*]] = trunc nsw <2 x i32> [[TMP1]] to <2 x i10>
 ; CHECK-NEXT:    ret <2 x i10> [[C]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/lshr.ll b/llvm/test/Transforms/InstCombine/lshr.ll
index 4360714c78caa6..4c38b1083d6489 100644
--- a/llvm/test/Transforms/InstCombine/lshr.ll
+++ b/llvm/test/Transforms/InstCombine/lshr.ll
@@ -73,7 +73,7 @@ define <2 x i8> @lshr_cttz_zero_is_not_undef_splat_vec(<2 x i8> %x) {
 
 define <2 x i8> @lshr_ctpop_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_ctpop_splat_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
 ;
@@ -153,8 +153,8 @@ define i8 @lshr_exact(i8 %x) {
 
 define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_exact_splat_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[LSHR:%.*]] = and <2 x i8> [[TMP1]], <i8 63, i8 63>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 1)
+; CHECK-NEXT:    [[LSHR:%.*]] = and <2 x i8> [[TMP1]], splat (i8 63)
 ; CHECK-NEXT:    ret <2 x i8> [[LSHR]]
 ;
   %shl = shl <2 x i8> %x, <i8 2, i8 2>
@@ -165,7 +165,7 @@ define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) {
 
 define <2 x i8> @lshr_exact_splat_vec_nuw(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_exact_splat_vec_nuw(
-; CHECK-NEXT:    [[LSHR:%.*]] = add nuw <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[LSHR:%.*]] = add nuw <2 x i8> [[X:%.*]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i8> [[LSHR]]
 ;
   %shl = shl nuw <2 x i8> %x, <i8 2, i8 2>
@@ -190,9 +190,9 @@ define i8 @shl_add(i8 %x, i8 %y) {
 define <2 x i8> @shl_add_commute_vec(<2 x i8> %x, <2 x i8> %py) {
 ; CHECK-LABEL: @shl_add_commute_vec(
 ; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], [[PY]]
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[Y]], <i8 3, i8 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[Y]], splat (i8 3)
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[TMP2]], <i8 31, i8 31>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[TMP2]], splat (i8 31)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %y = mul <2 x i8> %py, %py ; thwart complexity-based canonicalization
@@ -337,7 +337,7 @@ define <2 x i8> @fake_sext_splat(<2 x i3> %x) {
 
 define <2 x i32> @narrow_lshr_constant(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @narrow_lshr_constant(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[SH:%.*]] = zext nneg <2 x i8> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[SH]]
 ;
@@ -851,7 +851,7 @@ define i12 @trunc_sandwich(i32 %x) {
 
 define <2 x i12> @trunc_sandwich_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @trunc_sandwich_splat_vec(
-; CHECK-NEXT:    [[SUM_SHIFT:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 30, i32 30>
+; CHECK-NEXT:    [[SUM_SHIFT:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 30)
 ; CHECK-NEXT:    [[R1:%.*]] = trunc nuw nsw <2 x i32> [[SUM_SHIFT]] to <2 x i12>
 ; CHECK-NEXT:    ret <2 x i12> [[R1]]
 ;
@@ -1269,7 +1269,7 @@ define <2 x i64> @narrow_bswap_splat_poison_elt(<2 x i16> %x) {
 define <2 x i64> @narrow_bswap_overshift(<2 x i32> %x) {
 ; CHECK-LABEL: @narrow_bswap_overshift(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]])
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 16)
 ; CHECK-NEXT:    [[S:%.*]] = zext nneg <2 x i32> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[S]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/masked-merge-add.ll b/llvm/test/Transforms/InstCombine/masked-merge-add.ll
index 5ef53ad5150137..d78a9ee59c714b 100644
--- a/llvm/test/Transforms/InstCombine/masked-merge-add.ll
+++ b/llvm/test/Transforms/InstCombine/masked-merge-add.ll
@@ -34,7 +34,7 @@ define i32 @p(i32 %x, i32 %y, i32 noundef %m) {
 define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> noundef %m) {
 ; CHECK-LABEL: @p_splatvec(
 ; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], [[M:%.*]]
-; CHECK-NEXT:    [[NEG:%.*]] = xor <2 x i32> [[M]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[NEG:%.*]] = xor <2 x i32> [[M]], splat (i32 -1)
 ; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], [[NEG]]
 ; CHECK-NEXT:    [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
@@ -95,8 +95,8 @@ define i32 @p_constmask(i32 %x, i32 %y) {
 
 define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p_constmask_splatvec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 65280, i32 65280>
-; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 65280)
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281)
 ; CHECK-NEXT:    [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
@@ -151,8 +151,8 @@ define i32 @p_constmask2(i32 %x, i32 %y) {
 
 define <2 x i32> @p_constmask2_splatvec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p_constmask2_splatvec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 61440, i32 61440>
-; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 61440)
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281)
 ; CHECK-NEXT:    [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll b/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll
index 639478dfcc6fe8..399d6d4ac07d3f 100644
--- a/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll
+++ b/llvm/test/Transforms/InstCombine/masked-merge-and-of-ors.ll
@@ -31,7 +31,7 @@ define i32 @p(i32 %x, i32 %y, i32 %m) {
 
 define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> %m) {
 ; CHECK-LABEL: @p_splatvec(
-; CHECK-NEXT:    [[NEG:%.*]] = xor <2 x i32> [[M:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[NEG:%.*]] = xor <2 x i32> [[M:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[X:%.*]], [[NEG]]
 ; CHECK-NEXT:    [[OR1:%.*]] = or <2 x i32> [[Y:%.*]], [[M]]
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[OR]], [[OR1]]
@@ -78,8 +78,8 @@ define i32 @p_constmask(i32 %x, i32 %y) {
 
 define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p_constmask_splatvec(
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[X:%.*]], <i32 -65281, i32 -65281>
-; CHECK-NEXT:    [[OR1:%.*]] = or <2 x i32> [[Y:%.*]], <i32 65280, i32 65280>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 -65281)
+; CHECK-NEXT:    [[OR1:%.*]] = or <2 x i32> [[Y:%.*]], splat (i32 65280)
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[OR]], [[OR1]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/masked-merge-or.ll b/llvm/test/Transforms/InstCombine/masked-merge-or.ll
index dd2ac6dfe51091..5e41279dd26b90 100644
--- a/llvm/test/Transforms/InstCombine/masked-merge-or.ll
+++ b/llvm/test/Transforms/InstCombine/masked-merge-or.ll
@@ -34,7 +34,7 @@ define i32 @p(i32 %x, i32 %y, i32 noundef %m) {
 define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> noundef %m) {
 ; CHECK-LABEL: @p_splatvec(
 ; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], [[M:%.*]]
-; CHECK-NEXT:    [[NEG:%.*]] = xor <2 x i32> [[M]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[NEG:%.*]] = xor <2 x i32> [[M]], splat (i32 -1)
 ; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], [[NEG]]
 ; CHECK-NEXT:    [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
@@ -95,8 +95,8 @@ define i32 @p_constmask(i32 %x, i32 %y) {
 
 define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p_constmask_splatvec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 65280, i32 65280>
-; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 65280)
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281)
 ; CHECK-NEXT:    [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
@@ -151,8 +151,8 @@ define i32 @p_constmask2(i32 %x, i32 %y) {
 
 define <2 x i32> @p_constmask2_splatvec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p_constmask2_splatvec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 61440, i32 61440>
-; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 61440)
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281)
 ; CHECK-NEXT:    [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/masked-merge-xor.ll b/llvm/test/Transforms/InstCombine/masked-merge-xor.ll
index 7ed1f3fdfdab64..463f220fcbb6dd 100644
--- a/llvm/test/Transforms/InstCombine/masked-merge-xor.ll
+++ b/llvm/test/Transforms/InstCombine/masked-merge-xor.ll
@@ -34,7 +34,7 @@ define i32 @p(i32 %x, i32 %y, i32 noundef %m) {
 define <2 x i32> @p_splatvec(<2 x i32> %x, <2 x i32> %y, <2 x i32> noundef %m) {
 ; CHECK-LABEL: @p_splatvec(
 ; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], [[M:%.*]]
-; CHECK-NEXT:    [[NEG:%.*]] = xor <2 x i32> [[M]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[NEG:%.*]] = xor <2 x i32> [[M]], splat (i32 -1)
 ; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], [[NEG]]
 ; CHECK-NEXT:    [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
@@ -95,8 +95,8 @@ define i32 @p_constmask(i32 %x, i32 %y) {
 
 define <2 x i32> @p_constmask_splatvec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p_constmask_splatvec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 65280, i32 65280>
-; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 65280)
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281)
 ; CHECK-NEXT:    [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
@@ -151,8 +151,8 @@ define i32 @p_constmask2(i32 %x, i32 %y) {
 
 define <2 x i32> @p_constmask2_splatvec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p_constmask2_splatvec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 61440, i32 61440>
-; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], <i32 -65281, i32 -65281>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 61440)
+; CHECK-NEXT:    [[AND1:%.*]] = and <2 x i32> [[Y:%.*]], splat (i32 -65281)
 ; CHECK-NEXT:    [[RET:%.*]] = or disjoint <2 x i32> [[AND]], [[AND1]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/max-of-nots.ll b/llvm/test/Transforms/InstCombine/max-of-nots.ll
index acce89b38be83d..3a1f872faab17b 100644
--- a/llvm/test/Transforms/InstCombine/max-of-nots.ll
+++ b/llvm/test/Transforms/InstCombine/max-of-nots.ll
@@ -4,7 +4,7 @@
 define <2 x i32> @umin_of_nots(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @umin_of_nots(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]])
-; CHECK-NEXT:    [[MIN:%.*]] = xor <2 x i32> [[TMP1]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[MIN:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i32> [[MIN]]
 ;
   %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
@@ -17,7 +17,7 @@ define <2 x i32> @umin_of_nots(<2 x i32> %x, <2 x i32> %y) {
 define <2 x i32> @smin_of_nots(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @smin_of_nots(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]])
-; CHECK-NEXT:    [[MIN:%.*]] = xor <2 x i32> [[TMP1]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[MIN:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i32> [[MIN]]
 ;
   %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
@@ -242,7 +242,7 @@ define <2 x i32> @max_of_nots_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @max_of_nots_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[X:%.*]])
-; CHECK-NEXT:    [[SMAX96:%.*]] = xor <2 x i32> [[TMP2]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[SMAX96:%.*]] = xor <2 x i32> [[TMP2]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i32> [[SMAX96]]
 ;
   %c0 = icmp sgt <2 x i32> %y, zeroinitializer
@@ -324,7 +324,7 @@ define i32 @min_of_max_swap(i32 %a) {
 
 define <2 x i32> @max_of_min_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @max_of_min_vec(
-; CHECK-NEXT:    ret <2 x i32> <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 -1)
 ;
   %not_a = xor <2 x i32> %a, <i32 -1, i32 -1>
   %c0 = icmp sgt <2 x i32> %a, zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/maximum.ll b/llvm/test/Transforms/InstCombine/maximum.ll
index 7455d3b7b924cc..c79449472235b5 100644
--- a/llvm/test/Transforms/InstCombine/maximum.ll
+++ b/llvm/test/Transforms/InstCombine/maximum.ll
@@ -217,7 +217,7 @@ define float @maximum_f32_1_maximum_p0_val(float %x) {
 
 define <2 x float> @maximum_f32_1_maximum_val_p0_val_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: @maximum_f32_1_maximum_val_p0_val_v2f32(
-; CHECK-NEXT:    [[Z:%.*]] = call <2 x float> @llvm.maximum.v2f32(<2 x float> [[X:%.*]], <2 x float> <float 1.000000e+00, float 1.000000e+00>)
+; CHECK-NEXT:    [[Z:%.*]] = call <2 x float> @llvm.maximum.v2f32(<2 x float> [[X:%.*]], <2 x float> splat (float 1.000000e+00))
 ; CHECK-NEXT:    ret <2 x float> [[Z]]
 ;
   %y = call <2 x float> @llvm.maximum.v2f32(<2 x float> %x, <2 x float> zeroinitializer)
diff --git a/llvm/test/Transforms/InstCombine/maxnum.ll b/llvm/test/Transforms/InstCombine/maxnum.ll
index f26a5300febd85..67fb7a90b3d49e 100644
--- a/llvm/test/Transforms/InstCombine/maxnum.ll
+++ b/llvm/test/Transforms/InstCombine/maxnum.ll
@@ -217,7 +217,7 @@ define float @maxnum_f32_1_maxnum_p0_val(float %x) {
 
 define <2 x float> @maxnum_f32_1_maxnum_val_p0_val_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: @maxnum_f32_1_maxnum_val_p0_val_v2f32(
-; CHECK-NEXT:    [[Z:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[X:%.*]], <2 x float> <float 1.000000e+00, float 1.000000e+00>)
+; CHECK-NEXT:    [[Z:%.*]] = call <2 x float> @llvm.maxnum.v2f32(<2 x float> [[X:%.*]], <2 x float> splat (float 1.000000e+00))
 ; CHECK-NEXT:    ret <2 x float> [[Z]]
 ;
   %y = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> zeroinitializer)
diff --git a/llvm/test/Transforms/InstCombine/merge-icmp.ll b/llvm/test/Transforms/InstCombine/merge-icmp.ll
index 4e8ef07dba04c8..4f658273ce0982 100644
--- a/llvm/test/Transforms/InstCombine/merge-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/merge-icmp.ll
@@ -38,7 +38,7 @@ define i1 @and_test1_logical(ptr %x) {
 define <2 x i1> @and_test1_vector(ptr %x) {
 ; CHECK-LABEL: @and_test1_vector(
 ; CHECK-NEXT:    [[LOAD:%.*]] = load <2 x i16>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[OR:%.*]] = icmp eq <2 x i16> [[LOAD]], <i16 17791, i16 17791>
+; CHECK-NEXT:    [[OR:%.*]] = icmp eq <2 x i16> [[LOAD]], splat (i16 17791)
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
 ;
   %load = load <2 x i16>, ptr %x, align 4
@@ -83,7 +83,7 @@ define i1 @and_test2_logical(ptr %x) {
 define <2 x i1> @and_test2_vector(ptr %x) {
 ; CHECK-LABEL: @and_test2_vector(
 ; CHECK-NEXT:    [[LOAD:%.*]] = load <2 x i16>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[OR:%.*]] = icmp eq <2 x i16> [[LOAD]], <i16 32581, i16 32581>
+; CHECK-NEXT:    [[OR:%.*]] = icmp eq <2 x i16> [[LOAD]], splat (i16 32581)
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
 ;
   %load = load <2 x i16>, ptr %x, align 4
@@ -123,7 +123,7 @@ define i1 @or_basic_commuted(i16 %load) {
 
 define <2 x i1> @or_vector(<2 x i16> %load) {
 ; CHECK-LABEL: @or_vector(
-; CHECK-NEXT:    [[OR:%.*]] = icmp ne <2 x i16> [[LOAD:%.*]], <i16 17791, i16 17791>
+; CHECK-NEXT:    [[OR:%.*]] = icmp ne <2 x i16> [[LOAD:%.*]], splat (i16 17791)
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
 ;
   %trunc = trunc <2 x i16> %load to <2 x i8>
diff --git a/llvm/test/Transforms/InstCombine/min-positive.ll b/llvm/test/Transforms/InstCombine/min-positive.ll
index d2c2e9018792bd..db73974b7bf6ad 100644
--- a/llvm/test/Transforms/InstCombine/min-positive.ll
+++ b/llvm/test/Transforms/InstCombine/min-positive.ll
@@ -97,7 +97,7 @@ define i1 @maybe_not_positive(i32 %other) {
 
 define <2 x i1> @maybe_not_positive_vec(<2 x i32> %x, <2 x i32> %other) {
 ; CHECK-LABEL: @maybe_not_positive_vec(
-; CHECK-NEXT:    [[NOTNEG:%.*]] = and <2 x i32> [[X:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    [[NOTNEG:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 7)
 ; CHECK-NEXT:    [[SEL:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[NOTNEG]], <2 x i32> [[OTHER:%.*]])
 ; CHECK-NEXT:    [[TEST:%.*]] = icmp sgt <2 x i32> [[SEL]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[TEST]]
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index 26cd4996e687d5..8810aba52e080c 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -226,7 +226,7 @@ define i32 @test68(i32 %x) {
 
 define <2 x i32> @test68vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test68vec(
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 11, i32 11>)
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 11))
 ; CHECK-NEXT:    ret <2 x i32> [[COND]]
 ;
   %cmp = icmp slt <2 x i32> <i32 11, i32 11>, %x
@@ -290,7 +290,7 @@ define i32 @test72(i32 %x) {
 
 define <2 x i32> @test72vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test72vec(
-; CHECK-NEXT:    [[RETVAL:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 11, i32 11>)
+; CHECK-NEXT:    [[RETVAL:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 11))
 ; CHECK-NEXT:    ret <2 x i32> [[RETVAL]]
 ;
   %cmp = icmp sgt <2 x i32> %x, <i32 92, i32 92>
@@ -711,7 +711,7 @@ define <2 x i8> @min_through_cast_vec1(<2 x i32> %x) {
 
 define <2 x i8> @min_through_cast_vec2(<2 x i32> %x) {
 ; CHECK-LABEL: @min_through_cast_vec2(
-; CHECK-NEXT:    [[RES1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 511, i32 511>)
+; CHECK-NEXT:    [[RES1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 511))
 ; CHECK-NEXT:    [[RES:%.*]] = trunc <2 x i32> [[RES1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[RES]]
 ;
@@ -965,8 +965,8 @@ define i32 @add_umin_extra_use(i32 %x, ptr %p) {
 
 define <2 x i16> @add_umin_vec(<2 x i16> %x) {
 ; CHECK-LABEL: @add_umin_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> <i16 225, i16 225>)
-; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i16> [[TMP1]], <i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> splat (i16 225))
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i16> [[TMP1]], splat (i16 15)
 ; CHECK-NEXT:    ret <2 x i16> [[R]]
 ;
   %a = add nuw <2 x i16> %x, <i16 15, i16 15>
@@ -1212,8 +1212,8 @@ define i32 @add_smin_extra_use(i32 %x, ptr %p) {
 
 define <2 x i16> @add_smin_vec(<2 x i16> %x) {
 ; CHECK-LABEL: @add_smin_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> <i16 225, i16 225>)
-; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i16> [[TMP1]], <i16 15, i16 15>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> splat (i16 225))
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i16> [[TMP1]], splat (i16 15)
 ; CHECK-NEXT:    ret <2 x i16> [[R]]
 ;
   %a = add nsw <2 x i16> %x, <i16 15, i16 15>
@@ -1472,7 +1472,7 @@ define i32 @test_smin_umin2(i32 %x) {
 
 define <2 x i32> @test_smin_umin_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test_smin_umin_vec(
-; CHECK-NEXT:    [[UMIN:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 10, i32 10>)
+; CHECK-NEXT:    [[UMIN:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 10))
 ; CHECK-NEXT:    ret <2 x i32> [[UMIN]]
 ;
   %smin = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %x, <2 x i32> <i32 10, i32 20>)
diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
index a76f0f84ba3401..0b7127f82b6125 100644
--- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll
@@ -398,7 +398,7 @@ define i8 @smax_of_nots(i8 %x, i8 %y) {
 define <3 x i8> @smin_of_nots(<3 x i8> %x, <3 x i8> %y) {
 ; CHECK-LABEL: @smin_of_nots(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]])
-; CHECK-NEXT:    [[M:%.*]] = xor <3 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[M:%.*]] = xor <3 x i8> [[TMP1]], splat (i8 -1)
 ; CHECK-NEXT:    ret <3 x i8> [[M]]
 ;
   %notx = xor <3 x i8> %x, <i8 -1, i8 poison, i8 -1>
@@ -478,7 +478,7 @@ define i8 @smax_of_not_and_const(i8 %x) {
 define <3 x i8> @smin_of_not_and_const(<3 x i8> %x) {
 ; CHECK-LABEL: @smin_of_not_and_const(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> <i8 -43, i8 poison, i8 -44>)
-; CHECK-NEXT:    [[M:%.*]] = xor <3 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[M:%.*]] = xor <3 x i8> [[TMP1]], splat (i8 -1)
 ; CHECK-NEXT:    ret <3 x i8> [[M]]
 ;
   %notx = xor <3 x i8> %x, <i8 -1, i8 -1, i8 poison>
@@ -774,8 +774,8 @@ define i8 @clamp_two_vals_smax_smin(i8 %x) {
 
 define <3 x i8> @clamp_two_vals_smin_smax(<3 x i8> %x) {
 ; CHECK-LABEL: @clamp_two_vals_smin_smax(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <3 x i8> [[X:%.*]], <i8 41, i8 41, i8 41>
-; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i8> <i8 42, i8 42, i8 42>, <3 x i8> <i8 41, i8 41, i8 41>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <3 x i8> [[X:%.*]], splat (i8 41)
+; CHECK-NEXT:    [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i8> splat (i8 42), <3 x i8> splat (i8 41)
 ; CHECK-NEXT:    ret <3 x i8> [[R]]
 ;
   %m = call <3 x i8> @llvm.smin.v3i8(<3 x i8> %x, <3 x i8> <i8 42, i8 42, i8 42>)
@@ -1933,8 +1933,8 @@ define i8 @smax_offset_uses(i8 %x) {
 
 define <3 x i8> @smin_offset(<3 x i8> %x) {
 ; CHECK-LABEL: @smin_offset(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X:%.*]], <3 x i8> <i8 -127, i8 -127, i8 -127>)
-; CHECK-NEXT:    [[M:%.*]] = or disjoint <3 x i8> [[TMP1]], <i8 124, i8 124, i8 124>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[X:%.*]], <3 x i8> splat (i8 -127))
+; CHECK-NEXT:    [[M:%.*]] = or disjoint <3 x i8> [[TMP1]], splat (i8 124)
 ; CHECK-NEXT:    ret <3 x i8> [[M]]
 ;
   %a = add nsw nuw <3 x i8> %x, <i8 124, i8 124, i8 124>
@@ -1996,8 +1996,8 @@ define i8 @smin_offset_uses(i8 %x) {
 
 define <3 x i8> @umax_offset(<3 x i8> %x) {
 ; CHECK-LABEL: @umax_offset(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> <i8 3, i8 3, i8 3>)
-; CHECK-NEXT:    [[M:%.*]] = add nuw <3 x i8> [[TMP1]], <i8 127, i8 127, i8 127>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> splat (i8 3))
+; CHECK-NEXT:    [[M:%.*]] = add nuw <3 x i8> [[TMP1]], splat (i8 127)
 ; CHECK-NEXT:    ret <3 x i8> [[M]]
 ;
   %a = add nsw nuw <3 x i8> %x, <i8 127, i8 127, i8 127>
@@ -2509,8 +2509,8 @@ entry:
 define <3 x i8> @fold_umax_with_knownbits_info_poison_in_splat(<3 x i8> %a, <3 x i8> %b) {
 ; CHECK-LABEL: @fold_umax_with_knownbits_info_poison_in_splat(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A1:%.*]] = or <3 x i8> [[A:%.*]], <i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[A2:%.*]] = shl <3 x i8> [[B:%.*]], <i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[A1:%.*]] = or <3 x i8> [[A:%.*]], splat (i8 1)
+; CHECK-NEXT:    [[A2:%.*]] = shl <3 x i8> [[B:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[SUB:%.*]] = sub <3 x i8> [[A1]], [[A2]]
 ; CHECK-NEXT:    ret <3 x i8> [[SUB]]
 ;
@@ -2538,7 +2538,7 @@ entry:
 define <3 x i8> @fold_umin_with_knownbits_info_poison_in_splat(<3 x i8> %a, <3 x i8> %b) {
 ; CHECK-LABEL: @fold_umin_with_knownbits_info_poison_in_splat(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret <3 x i8> <i8 3, i8 3, i8 3>
+; CHECK-NEXT:    ret <3 x i8> splat (i8 3)
 ;
 entry:
   %a1 = or <3 x i8> %a, <i8 3, i8 3, i8 3>
diff --git a/llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll b/llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll
index 8b896632b8adcc..e688391f72e631 100644
--- a/llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-of-xor-x.ll
@@ -15,7 +15,7 @@ declare void @barrier()
 
 define <2 x i8> @umax_xor_Cpow2(<2 x i8> %x) {
 ; CHECK-LABEL: @umax_xor_Cpow2(
-; CHECK-NEXT:    [[R:%.*]] = or <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[R:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %x_xor = xor <2 x i8> %x, <i8 128, i8 128>
@@ -45,7 +45,7 @@ define i8 @smax_xor_Cpow2_pos(i8 %x) {
 
 define <2 x i8> @smin_xor_Cpow2_pos(<2 x i8> %x) {
 ; CHECK-LABEL: @smin_xor_Cpow2_pos(
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[X:%.*]], <i8 -17, i8 -17>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -17)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %x_xor = xor <2 x i8> %x, <i8 16, i8 16>
@@ -55,7 +55,7 @@ define <2 x i8> @smin_xor_Cpow2_pos(<2 x i8> %x) {
 
 define <2 x i8> @smax_xor_Cpow2_neg(<2 x i8> %x) {
 ; CHECK-LABEL: @smax_xor_Cpow2_neg(
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[X:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 127)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %x_xor = xor <2 x i8> %x, <i8 128, i8 128>
@@ -91,7 +91,7 @@ define <2 x i8> @umin_xor_pow2(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @umin_xor_pow2(
 ; CHECK-NEXT:    [[NY:%.*]] = sub <2 x i8> zeroinitializer, [[Y:%.*]]
 ; CHECK-NEXT:    [[YP2:%.*]] = and <2 x i8> [[Y]], [[NY]]
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[YP2]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[YP2]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/modulo.ll b/llvm/test/Transforms/InstCombine/modulo.ll
index 2988c524faedca..7a9584d69d3bf1 100644
--- a/llvm/test/Transforms/InstCombine/modulo.ll
+++ b/llvm/test/Transforms/InstCombine/modulo.ll
@@ -16,7 +16,7 @@ define i32 @modulo2(i32 %x) {
 
 define <2 x i32> @modulo2_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @modulo2_vec(
-; CHECK-NEXT:    [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[RET_I]]
 ;
   %rem.i = srem <2 x i32> %x, <i32 2, i32 2>
@@ -43,9 +43,9 @@ define i32 @modulo3(i32 %x) {
 
 define <2 x i32> @modulo3_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @modulo3_vec(
-; CHECK-NEXT:    [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], splat (i32 3)
 ; CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <2 x i32> [[REM_I]], zeroinitializer
-; CHECK-NEXT:    [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> <i32 3, i32 3>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> splat (i32 3), <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[RET_I:%.*]] = add nsw <2 x i32> [[ADD_I]], [[REM_I]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET_I]]
 ;
@@ -70,7 +70,7 @@ define i32 @modulo4(i32 %x) {
 
 define <2 x i32> @modulo4_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @modulo4_vec(
-; CHECK-NEXT:    [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i32> [[RET_I]]
 ;
   %rem.i = srem <2 x i32> %x, <i32 4, i32 4>
@@ -97,9 +97,9 @@ define i32 @modulo7(i32 %x) {
 
 define <2 x i32> @modulo7_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @modulo7_vec(
-; CHECK-NEXT:    [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    [[REM_I:%.*]] = srem <2 x i32> [[X:%.*]], splat (i32 7)
 ; CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt <2 x i32> [[REM_I]], zeroinitializer
-; CHECK-NEXT:    [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> <i32 7, i32 7>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[ADD_I:%.*]] = select <2 x i1> [[CMP_I]], <2 x i32> splat (i32 7), <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[RET_I:%.*]] = add nsw <2 x i32> [[ADD_I]], [[REM_I]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET_I]]
 ;
@@ -124,7 +124,7 @@ define i32 @modulo32(i32 %x) {
 
 define <2 x i32> @modulo32_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @modulo32_vec(
-; CHECK-NEXT:    [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[RET_I:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i32> [[RET_I]]
 ;
   %rem.i = srem <2 x i32> %x, <i32 32, i32 32>
diff --git a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
index 94f41236c6f867..3a4c65d9b8f322 100644
--- a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll
@@ -14,7 +14,7 @@ define i32 @pow2_multiplier(i32 %A) {
 
 define <2 x i32> @pow2_multiplier_vec(<2 x i32> %A) {
 ; CHECK-LABEL: @pow2_multiplier_vec(
-; CHECK-NEXT:    [[B:%.*]] = shl <2 x i32> [[A:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[B:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %B = mul <2 x i32> %A, <i32 8, i32 8>
@@ -492,7 +492,7 @@ define i55 @neg_mul_constant_apint(i55 %A) {
 
 define <3 x i8> @neg_mul_constant_vec(<3 x i8> %a) {
 ; CHECK-LABEL: @neg_mul_constant_vec(
-; CHECK-NEXT:    [[B:%.*]] = mul <3 x i8> [[A:%.*]], <i8 -5, i8 -5, i8 -5>
+; CHECK-NEXT:    [[B:%.*]] = mul <3 x i8> [[A:%.*]], splat (i8 -5)
 ; CHECK-NEXT:    ret <3 x i8> [[B]]
 ;
   %A = sub <3 x i8> zeroinitializer, %a
@@ -593,7 +593,7 @@ define i32 @test32(i32 %X) {
 
 define <2 x i32> @test32vec(<2 x i32> %X) {
 ; CHECK-LABEL: @test32vec(
-; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %mul = mul nsw <2 x i32> %X, <i32 -2147483648, i32 -2147483648>
@@ -611,7 +611,7 @@ define i32 @test33(i32 %X) {
 
 define <2 x i32> @test33vec(<2 x i32> %X) {
 ; CHECK-LABEL: @test33vec(
-; CHECK-NEXT:    [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], <i32 30, i32 30>
+; CHECK-NEXT:    [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], splat (i32 30)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %mul = mul nsw <2 x i32> %X, <i32 1073741824, i32 1073741824>
@@ -767,7 +767,7 @@ define i32 @negate_if_false_nuw(i32 %x, i1 %cond) {
 
 define <2 x i8> @negate_if_true_commute(<2 x i8> %px, i1 %cond) {
 ; CHECK-LABEL: @negate_if_true_commute(
-; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> <i8 42, i8 42>, [[PX:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[PX:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <2 x i8> zeroinitializer, [[X]]
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[COND:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[X]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
@@ -810,8 +810,8 @@ define i32 @negate_if_true_extra_use(i32 %x, i1 %cond) {
 
 define <2 x i8> @negate_if_true_wrong_constant(<2 x i8> %px, i1 %cond) {
 ; CHECK-LABEL: @negate_if_true_wrong_constant(
-; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> <i8 42, i8 42>, [[PX:%.*]]
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> <i8 -1, i8 0>, <2 x i8> <i8 1, i8 1>
+; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[PX:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> <i8 -1, i8 0>, <2 x i8> splat (i8 1)
 ; CHECK-NEXT:    [[R:%.*]] = mul <2 x i8> [[X]], [[SEL]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -956,7 +956,7 @@ define i32 @mulsub1(i32 %a0, i32 %a1) {
 define <2 x i32> @mulsub1_vec(<2 x i32> %a0, <2 x i32> %a1) {
 ; CHECK-LABEL: @mulsub1_vec(
 ; CHECK-NEXT:    [[SUB_NEG:%.*]] = sub <2 x i32> [[A0:%.*]], [[A1:%.*]]
-; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], <i32 2, i32 2>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %sub = sub <2 x i32> %a1, %a0
@@ -999,8 +999,8 @@ define i32 @mulsub2(i32 %a0) {
 
 define <2 x i32> @mulsub2_vec(<2 x i32> %a0) {
 ; CHECK-LABEL: @mulsub2_vec(
-; CHECK-NEXT:    [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], <i32 2, i32 2>
-; CHECK-NEXT:    [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], <i32 -64, i32 -64>
+; CHECK-NEXT:    [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], splat (i32 2)
+; CHECK-NEXT:    [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], splat (i32 -64)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %sub = sub <2 x i32> <i32 16, i32 16>, %a0
@@ -1043,8 +1043,8 @@ define i32 @muladd2(i32 %a0) {
 
 define <2 x i32> @muladd2_vec(<2 x i32> %a0) {
 ; CHECK-LABEL: @muladd2_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], <i32 2, i32 2>
-; CHECK-NEXT:    [[MUL:%.*]] = sub <2 x i32> <i32 -64, i32 -64>, [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], splat (i32 2)
+; CHECK-NEXT:    [[MUL:%.*]] = sub <2 x i32> splat (i32 -64), [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %add = add <2 x i32> %a0, <i32 16, i32 16>
diff --git a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll
index fd8ad88764f592..48d44c05f39dff 100644
--- a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll
+++ b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll
@@ -103,8 +103,8 @@ define i8 @one_demanded_bit(i8 %x) {
 
 define <2 x i8> @one_demanded_bit_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @one_demanded_bit_splat(
-; CHECK-NEXT:    [[M:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[M]], <i8 32, i8 32>
+; CHECK-NEXT:    [[M:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 5)
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[M]], splat (i8 32)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %m = mul <2 x i8> %x, <i8 160, i8 160> ; 0b1010_0000
@@ -134,7 +134,7 @@ define i33 @squared_one_demanded_low_bit(i33 %x) {
 
 define <2 x i8> @squared_one_demanded_low_bit_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @squared_one_demanded_low_bit_splat(
-; CHECK-NEXT:    [[AND:%.*]] = or <2 x i8> [[X:%.*]], <i8 -2, i8 -2>
+; CHECK-NEXT:    [[AND:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 -2)
 ; CHECK-NEXT:    ret <2 x i8> [[AND]]
 ;
   %mul = mul <2 x i8> %x, %x
@@ -154,8 +154,8 @@ define i33 @squared_demanded_2_low_bits(i33 %x) {
 
 define <2 x i8> @squared_demanded_2_low_bits_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @squared_demanded_2_low_bits_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[AND:%.*]] = or disjoint <2 x i8> [[TMP1]], <i8 -4, i8 -4>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 1)
+; CHECK-NEXT:    [[AND:%.*]] = or disjoint <2 x i8> [[TMP1]], splat (i8 -4)
 ; CHECK-NEXT:    ret <2 x i8> [[AND]]
 ;
   %mul = mul <2 x i8> %x, %x
@@ -240,7 +240,7 @@ define i64 @scalar_mul_bit_x0_yC(i64 %x, i64 %y, i64 %c) {
 ; Vector tests
 define <2 x i64> @vector_mul_bit_x0_y0(<2 x i64> %x, <2 x i64> %y) {
 ; CHECK-LABEL: @vector_mul_bit_x0_y0(
-; CHECK-NEXT:    [[AND2:%.*]] = and <2 x i64> [[Y:%.*]], <i64 1, i64 1>
+; CHECK-NEXT:    [[AND2:%.*]] = and <2 x i64> [[Y:%.*]], splat (i64 1)
 ; CHECK-NEXT:    [[MUL:%.*]] = and <2 x i64> [[X:%.*]], [[AND2]]
 ; CHECK-NEXT:    ret <2 x i64> [[MUL]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll
index 8c528e340bc6ce..ec77f2e20e0c03 100644
--- a/llvm/test/Transforms/InstCombine/mul.ll
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -14,7 +14,7 @@ define i32 @pow2_multiplier(i32 %A) {
 
 define <2 x i32> @pow2_multiplier_vec(<2 x i32> %A) {
 ; CHECK-LABEL: @pow2_multiplier_vec(
-; CHECK-NEXT:    [[B:%.*]] = shl <2 x i32> [[A:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[B:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %B = mul <2 x i32> %A, <i32 8, i32 8>
@@ -989,7 +989,7 @@ define i32 @PR57278_or_disjoint_nsw(i32 %a) {
 define <2 x i32> @PR57278_shl_vec(<2 x i32> %v1) {
 ; CHECK-LABEL: @PR57278_shl_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw <2 x i32> [[V1:%.*]], <i32 12, i32 24>
-; CHECK-NEXT:    [[MUL:%.*]] = add nuw <2 x i32> [[TMP1]], <i32 9, i32 9>
+; CHECK-NEXT:    [[MUL:%.*]] = add nuw <2 x i32> [[TMP1]], splat (i32 9)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %shl = shl nuw <2 x i32> %v1, <i32 2, i32 3>
@@ -1105,7 +1105,7 @@ define i55 @neg_mul_constant_apint(i55 %A) {
 
 define <3 x i8> @neg_mul_constant_vec(<3 x i8> %a) {
 ; CHECK-LABEL: @neg_mul_constant_vec(
-; CHECK-NEXT:    [[B:%.*]] = mul <3 x i8> [[A:%.*]], <i8 -5, i8 -5, i8 -5>
+; CHECK-NEXT:    [[B:%.*]] = mul <3 x i8> [[A:%.*]], splat (i8 -5)
 ; CHECK-NEXT:    ret <3 x i8> [[B]]
 ;
   %A = sub <3 x i8> zeroinitializer, %a
@@ -1175,7 +1175,7 @@ define i32 @test32(i32 %X) {
 
 define <2 x i32> @test32vec(<2 x i32> %X) {
 ; CHECK-LABEL: @test32vec(
-; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %mul = mul nsw <2 x i32> %X, <i32 -2147483648, i32 -2147483648>
@@ -1193,7 +1193,7 @@ define i32 @test33(i32 %X) {
 
 define <2 x i32> @test33vec(<2 x i32> %X) {
 ; CHECK-LABEL: @test33vec(
-; CHECK-NEXT:    [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], <i32 30, i32 30>
+; CHECK-NEXT:    [[MUL:%.*]] = shl nsw <2 x i32> [[X:%.*]], splat (i32 30)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %mul = mul nsw <2 x i32> %X, <i32 1073741824, i32 1073741824>
@@ -1479,7 +1479,7 @@ define i32 @negate_if_false(i32 %x, i1 %cond) {
 
 define <2 x i8> @negate_if_true_commute(<2 x i8> %px, i1 %cond) {
 ; CHECK-LABEL: @negate_if_true_commute(
-; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> <i8 42, i8 42>, [[PX:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[PX:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <2 x i8> zeroinitializer, [[X]]
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[COND:%.*]], <2 x i8> [[TMP1]], <2 x i8> [[X]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
@@ -1522,8 +1522,8 @@ define i32 @negate_if_true_extra_use(i32 %x, i1 %cond) {
 
 define <2 x i8> @negate_if_true_wrong_constant(<2 x i8> %px, i1 %cond) {
 ; CHECK-LABEL: @negate_if_true_wrong_constant(
-; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> <i8 42, i8 42>, [[PX:%.*]]
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> <i8 -1, i8 0>, <2 x i8> <i8 1, i8 1>
+; CHECK-NEXT:    [[X:%.*]] = sdiv <2 x i8> splat (i8 42), [[PX:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], <2 x i8> <i8 -1, i8 0>, <2 x i8> splat (i8 1)
 ; CHECK-NEXT:    [[R:%.*]] = mul <2 x i8> [[X]], [[SEL]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -1735,7 +1735,7 @@ define i32 @mulsub1(i32 %a0, i32 %a1) {
 define <2 x i32> @mulsub1_vec(<2 x i32> %a0, <2 x i32> %a1) {
 ; CHECK-LABEL: @mulsub1_vec(
 ; CHECK-NEXT:    [[SUB_NEG:%.*]] = sub <2 x i32> [[A0:%.*]], [[A1:%.*]]
-; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], <i32 2, i32 2>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %sub = sub <2 x i32> %a1, %a0
@@ -1778,8 +1778,8 @@ define i32 @mulsub2(i32 %a0) {
 
 define <2 x i32> @mulsub2_vec(<2 x i32> %a0) {
 ; CHECK-LABEL: @mulsub2_vec(
-; CHECK-NEXT:    [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], <i32 2, i32 2>
-; CHECK-NEXT:    [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], <i32 -64, i32 -64>
+; CHECK-NEXT:    [[SUB_NEG:%.*]] = shl <2 x i32> [[A0:%.*]], splat (i32 2)
+; CHECK-NEXT:    [[MUL:%.*]] = add <2 x i32> [[SUB_NEG]], splat (i32 -64)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %sub = sub <2 x i32> <i32 16, i32 16>, %a0
@@ -1846,8 +1846,8 @@ define i32 @muladd2(i32 %a0) {
 
 define <2 x i32> @muladd2_vec(<2 x i32> %a0) {
 ; CHECK-LABEL: @muladd2_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], <i32 2, i32 2>
-; CHECK-NEXT:    [[MUL:%.*]] = sub <2 x i32> <i32 -64, i32 -64>, [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[A0:%.*]], splat (i32 2)
+; CHECK-NEXT:    [[MUL:%.*]] = sub <2 x i32> splat (i32 -64), [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %add = add <2 x i32> %a0, <i32 16, i32 16>
@@ -2015,7 +2015,7 @@ define <2 x i16> @sext_negpow2_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @sext_negpow2_vec(
 ; CHECK-NEXT:    [[X_NEG:%.*]] = sub <2 x i8> zeroinitializer, [[X:%.*]]
 ; CHECK-NEXT:    [[X_NEG_Z:%.*]] = zext <2 x i8> [[X_NEG]] to <2 x i16>
-; CHECK-NEXT:    [[R:%.*]] = shl nuw <2 x i16> [[X_NEG_Z]], <i16 8, i16 8>
+; CHECK-NEXT:    [[R:%.*]] = shl nuw <2 x i16> [[X_NEG_Z]], splat (i16 8)
 ; CHECK-NEXT:    ret <2 x i16> [[R]]
 ;
   %sx = sext <2 x i8> %x to <2 x i16>
diff --git a/llvm/test/Transforms/InstCombine/mul_fold.ll b/llvm/test/Transforms/InstCombine/mul_fold.ll
index e4a21db8a6ece7..ced30b6efa2046 100644
--- a/llvm/test/Transforms/InstCombine/mul_fold.ll
+++ b/llvm/test/Transforms/InstCombine/mul_fold.ll
@@ -542,8 +542,8 @@ define <2 x i8> @mul_v2i8_low(<2 x i8> %in0, <2 x i8> %in1) {
 
 define <2 x i8> @mul_v2i8_low_one_extra_user(<2 x i8> %in0, <2 x i8> %in1) {
 ; CHECK-LABEL: @mul_v2i8_low_one_extra_user(
-; CHECK-NEXT:    [[IN0HI:%.*]] = lshr <2 x i8> [[IN0:%.*]], <i8 4, i8 4>
-; CHECK-NEXT:    [[IN1LO:%.*]] = and <2 x i8> [[IN1:%.*]], <i8 15, i8 15>
+; CHECK-NEXT:    [[IN0HI:%.*]] = lshr <2 x i8> [[IN0:%.*]], splat (i8 4)
+; CHECK-NEXT:    [[IN1LO:%.*]] = and <2 x i8> [[IN1:%.*]], splat (i8 15)
 ; CHECK-NEXT:    [[M01:%.*]] = mul nuw <2 x i8> [[IN1LO]], [[IN0HI]]
 ; CHECK-NEXT:    call void @use_v2i8(<2 x i8> [[M01]])
 ; CHECK-NEXT:    [[RETLO:%.*]] = mul <2 x i8> [[IN0]], [[IN1]]
diff --git a/llvm/test/Transforms/InstCombine/narrow-math.ll b/llvm/test/Transforms/InstCombine/narrow-math.ll
index 0fcded7b0220a1..14527e5b760d7e 100644
--- a/llvm/test/Transforms/InstCombine/narrow-math.ll
+++ b/llvm/test/Transforms/InstCombine/narrow-math.ll
@@ -214,8 +214,8 @@ define i64 @sext_add_constant_extra_use(i32 %V) {
 
 define <2 x i64> @test5_splat(<2 x i32> %V) {
 ; CHECK-LABEL: @test5_splat(
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 1073741823, i32 1073741823>
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], splat (i32 1073741823)
 ; CHECK-NEXT:    [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[ADD]]
 ;
@@ -227,7 +227,7 @@ define <2 x i64> @test5_splat(<2 x i32> %V) {
 
 define <2 x i64> @test5_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @test5_vec(
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 1, i32 2>
 ; CHECK-NEXT:    [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[ADD]]
@@ -253,8 +253,8 @@ define i64 @test6(i32 %V) {
 
 define <2 x i64> @test6_splat(<2 x i32> %V) {
 ; CHECK-LABEL: @test6_splat(
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1073741824, i32 -1073741824>
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], splat (i32 -1073741824)
 ; CHECK-NEXT:    [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[ADD]]
 ;
@@ -266,7 +266,7 @@ define <2 x i64> @test6_splat(<2 x i32> %V) {
 
 define <2 x i64> @test6_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @test6_vec(
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1, i32 -2>
 ; CHECK-NEXT:    [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[ADD]]
@@ -279,7 +279,7 @@ define <2 x i64> @test6_vec(<2 x i32> %V) {
 
 define <2 x i64> @test6_vec2(<2 x i32> %V) {
 ; CHECK-LABEL: @test6_vec2(
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[NARROW:%.*]] = add nsw <2 x i32> [[ASHR]], <i32 -1, i32 1>
 ; CHECK-NEXT:    [[ADD:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[ADD]]
@@ -305,8 +305,8 @@ define i64 @test7(i32 %V) {
 
 define <2 x i64> @test7_splat(<2 x i32> %V) {
 ; CHECK-LABEL: @test7_splat(
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], splat (i32 2147483647)
 ; CHECK-NEXT:    [[ADD:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[ADD]]
 ;
@@ -318,7 +318,7 @@ define <2 x i64> @test7_splat(<2 x i32> %V) {
 
 define <2 x i64> @test7_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @test7_vec(
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[NARROW:%.*]] = add nuw <2 x i32> [[LSHR]], <i32 1, i32 2>
 ; CHECK-NEXT:    [[ADD:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[ADD]]
@@ -344,8 +344,8 @@ define i64 @test8(i32 %V) {
 
 define <2 x i64> @test8_splat(<2 x i32> %V) {
 ; CHECK-LABEL: @test8_splat(
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 16, i32 16>
-; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], <i32 32767, i32 32767>
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16)
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], splat (i32 32767)
 ; CHECK-NEXT:    [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MUL]]
 ;
@@ -357,7 +357,7 @@ define <2 x i64> @test8_splat(<2 x i32> %V) {
 
 define <2 x i64> @test8_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @test8_vec(
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], <i32 32767, i32 16384>
 ; CHECK-NEXT:    [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MUL]]
@@ -370,7 +370,7 @@ define <2 x i64> @test8_vec(<2 x i32> %V) {
 
 define <2 x i64> @test8_vec2(<2 x i32> %V) {
 ; CHECK-LABEL: @test8_vec2(
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], <i32 32767, i32 -32767>
 ; CHECK-NEXT:    [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MUL]]
@@ -396,8 +396,8 @@ define i64 @test9(i32 %V) {
 
 define <2 x i64> @test9_splat(<2 x i32> %V) {
 ; CHECK-LABEL: @test9_splat(
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 16, i32 16>
-; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], <i32 -32767, i32 -32767>
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16)
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], splat (i32 -32767)
 ; CHECK-NEXT:    [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MUL]]
 ;
@@ -409,7 +409,7 @@ define <2 x i64> @test9_splat(<2 x i32> %V) {
 
 define <2 x i64> @test9_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @test9_vec(
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[NARROW:%.*]] = mul nsw <2 x i32> [[ASHR]], <i32 -32767, i32 -10>
 ; CHECK-NEXT:    [[MUL:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MUL]]
@@ -435,8 +435,8 @@ define i64 @test10(i32 %V) {
 
 define <2 x i64> @test10_splat(<2 x i32> %V) {
 ; CHECK-LABEL: @test10_splat(
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 16, i32 16>
-; CHECK-NEXT:    [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 16)
+; CHECK-NEXT:    [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], splat (i32 65535)
 ; CHECK-NEXT:    [[MUL:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MUL]]
 ;
@@ -448,7 +448,7 @@ define <2 x i64> @test10_splat(<2 x i32> %V) {
 
 define <2 x i64> @test10_vec(<2 x i32> %V) {
 ; CHECK-LABEL: @test10_vec(
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 16, i32 16>
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[NARROW:%.*]] = mul nuw <2 x i32> [[LSHR]], <i32 65535, i32 2>
 ; CHECK-NEXT:    [[MUL:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[MUL]]
@@ -538,8 +538,8 @@ define i64 @test15(i32 %V) {
 
 define <2 x i64> @test15vec(<2 x i32> %V) {
 ; CHECK-LABEL: @test15vec(
-; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[NARROW:%.*]] = sub nsw <2 x i32> <i32 8, i32 8>, [[ASHR]]
+; CHECK-NEXT:    [[ASHR:%.*]] = ashr <2 x i32> [[V:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[NARROW:%.*]] = sub nsw <2 x i32> splat (i32 8), [[ASHR]]
 ; CHECK-NEXT:    [[SUB:%.*]] = sext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[SUB]]
 ;
@@ -564,8 +564,8 @@ define i64 @test16(i32 %V) {
 
 define <2 x i64> @test16vec(<2 x i32> %V) {
 ; CHECK-LABEL: @test16vec(
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[NARROW:%.*]] = sub nuw <2 x i32> <i32 -2, i32 -2>, [[LSHR]]
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i32> [[V:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[NARROW:%.*]] = sub nuw <2 x i32> splat (i32 -2), [[LSHR]]
 ; CHECK-NEXT:    [[SUB:%.*]] = zext <2 x i32> [[NARROW]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[SUB]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/narrow.ll b/llvm/test/Transforms/InstCombine/narrow.ll
index 40229f8511f76d..5fad7f07090b9f 100644
--- a/llvm/test/Transforms/InstCombine/narrow.ll
+++ b/llvm/test/Transforms/InstCombine/narrow.ll
@@ -37,7 +37,7 @@ define i32 @shrink_xor(i64 %a) {
 define <2 x i32> @shrink_xor_vec(<2 x i64> %a) {
 ; CHECK-LABEL: @shrink_xor_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[TRUNC:%.*]] = xor <2 x i32> [[TMP1]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TRUNC:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
 ;
   %xor = xor <2 x i64> %a, <i64 2, i64 2>
diff --git a/llvm/test/Transforms/InstCombine/negated-bitmask.ll b/llvm/test/Transforms/InstCombine/negated-bitmask.ll
index 91886781863476..95b03626df534a 100644
--- a/llvm/test/Transforms/InstCombine/negated-bitmask.ll
+++ b/llvm/test/Transforms/InstCombine/negated-bitmask.ll
@@ -31,8 +31,8 @@ define i8 @sub_mask1_lshr(i8 %a0) {
 
 define <4 x i32> @neg_mask1_lshr_vector_uniform(<4 x i32> %a0) {
 ; CHECK-LABEL: @neg_mask1_lshr_vector_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[A0:%.*]], <i32 28, i32 28, i32 28, i32 28>
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[A0:%.*]], splat (i32 28)
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %shift = lshr <4 x i32> %a0, <i32 3, i32 3, i32 3, i32 3>
@@ -44,7 +44,7 @@ define <4 x i32> @neg_mask1_lshr_vector_uniform(<4 x i32> %a0) {
 define <4 x i32> @neg_mask1_lshr_vector_nonuniform(<4 x i32> %a0) {
 ; CHECK-LABEL: @neg_mask1_lshr_vector_nonuniform(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[A0:%.*]], <i32 28, i32 27, i32 26, i32 25>
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %shift = lshr <4 x i32> %a0, <i32 3, i32 4, i32 5, i32 6>
@@ -56,7 +56,7 @@ define <4 x i32> @neg_mask1_lshr_vector_nonuniform(<4 x i32> %a0) {
 define <4 x i32> @sub_mask1_lshr_vector_nonuniform(<4 x i32> %a0) {
 ; CHECK-LABEL: @sub_mask1_lshr_vector_nonuniform(
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[A0:%.*]], <i32 28, i32 27, i32 26, i32 25>
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 31)
 ; CHECK-NEXT:    [[NEG:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 5, i32 0, i32 -1, i32 65556>
 ; CHECK-NEXT:    ret <4 x i32> [[NEG]]
 ;
@@ -142,7 +142,7 @@ define i8 @neg_mask2_lshr_outofbounds(i8 %a0) {
 define <2 x i32> @neg_mask1_lshr_vector_var(<2 x i32> %a0, <2 x i32> %a1) {
 ; CHECK-LABEL: @neg_mask1_lshr_vector_var(
 ; CHECK-NEXT:    [[SHIFT:%.*]] = lshr <2 x i32> [[A0:%.*]], [[A1:%.*]]
-; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[SHIFT]], <i32 1, i32 1>
+; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[SHIFT]], splat (i32 1)
 ; CHECK-NEXT:    [[NEG:%.*]] = sub nsw <2 x i32> zeroinitializer, [[MASK]]
 ; CHECK-NEXT:    ret <2 x i32> [[NEG]]
 ;
@@ -171,8 +171,8 @@ define i8 @neg_mask1_lshr_extrause_mask(i8 %a0) {
 ; Extra Use - shift
 define <2 x i32> @neg_mask1_lshr_extrause_lshr(<2 x i32> %a0) {
 ; CHECK-LABEL: @neg_mask1_lshr_extrause_lshr(
-; CHECK-NEXT:    [[SHIFT:%.*]] = lshr <2 x i32> [[A0:%.*]], <i32 3, i32 3>
-; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[SHIFT]], <i32 1, i32 1>
+; CHECK-NEXT:    [[SHIFT:%.*]] = lshr <2 x i32> [[A0:%.*]], splat (i32 3)
+; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[SHIFT]], splat (i32 1)
 ; CHECK-NEXT:    [[NEG:%.*]] = sub nsw <2 x i32> zeroinitializer, [[MASK]]
 ; CHECK-NEXT:    call void @usev2i32(<2 x i32> [[SHIFT]])
 ; CHECK-NEXT:    ret <2 x i32> [[NEG]]
@@ -200,7 +200,7 @@ define <2 x i64> @neg_signbit_use1(<2 x i32> %x) {
 ; CHECK-LABEL: @neg_signbit_use1(
 ; CHECK-NEXT:    [[S:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 poison>
 ; CHECK-NEXT:    call void @usev2i32(<2 x i32> [[S]])
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X]], splat (i32 31)
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/nested-select.ll b/llvm/test/Transforms/InstCombine/nested-select.ll
index d01dcf0793ade2..b8a7d4e8ab7894 100644
--- a/llvm/test/Transforms/InstCombine/nested-select.ll
+++ b/llvm/test/Transforms/InstCombine/nested-select.ll
@@ -515,7 +515,7 @@ define i8 @test_implied_true(i8 %x) {
 define <2 x i8> @test_implied_true_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @test_implied_true_vec(
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i8> zeroinitializer, <2 x i8> <i8 20, i8 20>
+; CHECK-NEXT:    [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i8> zeroinitializer, <2 x i8> splat (i8 20)
 ; CHECK-NEXT:    ret <2 x i8> [[SEL2]]
 ;
   %cmp1 = icmp slt <2 x i8> %x, <i8 10, i8 10>
@@ -570,10 +570,10 @@ define i8 @test_imply_fail(i8 %x) {
 
 define <2 x i8> @test_imply_type_mismatch(<2 x i8> %x, i8 %y) {
 ; CHECK-LABEL: @test_imply_type_mismatch(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 10, i8 10>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 10)
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i8 [[Y:%.*]], 0
-; CHECK-NEXT:    [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i8> zeroinitializer, <2 x i8> <i8 5, i8 5>
-; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[CMP2]], <2 x i8> [[SEL1]], <2 x i8> <i8 20, i8 20>
+; CHECK-NEXT:    [[SEL1:%.*]] = select <2 x i1> [[CMP1]], <2 x i8> zeroinitializer, <2 x i8> splat (i8 5)
+; CHECK-NEXT:    [[SEL2:%.*]] = select i1 [[CMP2]], <2 x i8> [[SEL1]], <2 x i8> splat (i8 20)
 ; CHECK-NEXT:    ret <2 x i8> [[SEL2]]
 ;
   %cmp1 = icmp slt <2 x i8> %x, <i8 10, i8 10>
@@ -584,6 +584,12 @@ define <2 x i8> @test_imply_type_mismatch(<2 x i8> %x, i8 %y) {
 }
 
 define <4 x i1> @test_dont_crash(i1 %cond, <4 x i1> %a, <4 x i1> %b) {
+; CHECK-LABEL: @test_dont_crash(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND:%.*]], <4 x i1> [[A:%.*]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[AND:%.*]] = and <4 x i1> [[SEL]], [[B:%.*]]
+; CHECK-NEXT:    ret <4 x i1> [[AND]]
+;
 entry:
   %sel = select i1 %cond, <4 x i1> %a, <4 x i1> zeroinitializer
   %and = and <4 x i1> %sel, %b
diff --git a/llvm/test/Transforms/InstCombine/not.ll b/llvm/test/Transforms/InstCombine/not.ll
index 3679976d9dc393..a692d11caf8eae 100644
--- a/llvm/test/Transforms/InstCombine/not.ll
+++ b/llvm/test/Transforms/InstCombine/not.ll
@@ -73,7 +73,7 @@ define i1 @not_cmp_constant(i32 %a) {
 
 define <2 x i1> @not_cmp_constant_vector(<2 x i32> %a) {
 ; CHECK-LABEL: @not_cmp_constant_vector(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], <i32 -43, i32 -43>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], splat (i32 -43)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %nota = xor <2 x i32> %a, <i32 -1, i32 -1>
@@ -114,7 +114,7 @@ define i8 @not_ashr_const(i8 %x) {
 
 define <2 x i8> @not_ashr_const_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @not_ashr_const_splat(
-; CHECK-NEXT:    [[NOT:%.*]] = lshr <2 x i8> <i8 41, i8 41>, [[X:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = lshr <2 x i8> splat (i8 41), [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[NOT]]
 ;
   %shr = ashr <2 x i8> <i8 -42, i8 -42>, %x
@@ -147,7 +147,7 @@ define i8 @not_lshr_const(i8 %x) {
 
 define <2 x i8> @not_lshr_const_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @not_lshr_const_splat(
-; CHECK-NEXT:    [[NOT:%.*]] = ashr <2 x i8> <i8 -43, i8 -43>, [[X:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = ashr <2 x i8> splat (i8 -43), [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[NOT]]
 ;
   %shr = lshr <2 x i8> <i8 42, i8 42>, %x
@@ -180,7 +180,7 @@ define i32 @not_sub_extra_use(i32 %y, ptr %p) {
 
 define <2 x i32> @not_sub_splat(<2 x i32> %y) {
 ; CHECK-LABEL: @not_sub_splat(
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y:%.*]], <i32 -124, i32 -124>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 -124)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %s = sub <2 x i32> <i32 123, i32 123>, %y
@@ -190,9 +190,9 @@ define <2 x i32> @not_sub_splat(<2 x i32> %y) {
 
 define <2 x i32> @not_sub_extra_use_splat(<2 x i32> %y, ptr %p) {
 ; CHECK-LABEL: @not_sub_extra_use_splat(
-; CHECK-NEXT:    [[S:%.*]] = sub <2 x i32> <i32 123, i32 123>, [[Y:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = sub <2 x i32> splat (i32 123), [[Y:%.*]]
 ; CHECK-NEXT:    store <2 x i32> [[S]], ptr [[P:%.*]], align 8
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y]], <i32 -124, i32 -124>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[Y]], splat (i32 -124)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %s = sub <2 x i32> <i32 123, i32 123>, %y
@@ -238,7 +238,7 @@ define i32 @not_add(i32 %x) {
 
 define <2 x i32> @not_add_splat(<2 x i32> %x) {
 ; CHECK-LABEL: @not_add_splat(
-; CHECK-NEXT:    [[R:%.*]] = sub <2 x i32> <i32 -124, i32 -124>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i32> splat (i32 -124), [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %a = add <2 x i32> %x, <i32 123, i32 123>
diff --git a/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll b/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll
index 8a508af632962a..787f8c7fd91701 100644
--- a/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/nsw-inseltpoison.ll
@@ -103,7 +103,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_nuw_nsw_shuffle_splat_vec(
 ; CHECK-NEXT:    [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 1>
-; CHECK-NEXT:    [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], <i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], splat (i32 17)
 ; CHECK-NEXT:    ret <3 x i32> [[T3]]
 ;
   %t2 = zext <2 x i8> %x to <2 x i32>
@@ -119,7 +119,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_undef_elt_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_nuw_nsw_shuffle_undef_elt_splat_vec(
 ; CHECK-NEXT:    [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 0>
-; CHECK-NEXT:    [[T3:%.*]] = shl <3 x i32> [[SHUF]], <i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[T3:%.*]] = shl <3 x i32> [[SHUF]], splat (i32 17)
 ; CHECK-NEXT:    ret <3 x i32> [[T3]]
 ;
   %t2 = zext <2 x i8> %x to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/nsw.ll b/llvm/test/Transforms/InstCombine/nsw.ll
index 6ced39a88c0a6a..329a47324f8623 100644
--- a/llvm/test/Transforms/InstCombine/nsw.ll
+++ b/llvm/test/Transforms/InstCombine/nsw.ll
@@ -103,7 +103,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_nuw_nsw_shuffle_splat_vec(
 ; CHECK-NEXT:    [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> <i32 1, i32 0, i32 1>
-; CHECK-NEXT:    [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], <i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[T3:%.*]] = shl nuw nsw <3 x i32> [[SHUF]], splat (i32 17)
 ; CHECK-NEXT:    ret <3 x i32> [[T3]]
 ;
   %t2 = zext <2 x i8> %x to <2 x i32>
@@ -119,7 +119,7 @@ define <3 x i32> @shl_nuw_nsw_shuffle_undef_elt_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_nuw_nsw_shuffle_undef_elt_splat_vec(
 ; CHECK-NEXT:    [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[T2]], <2 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 0>
-; CHECK-NEXT:    [[T3:%.*]] = shl <3 x i32> [[SHUF]], <i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[T3:%.*]] = shl <3 x i32> [[SHUF]], splat (i32 17)
 ; CHECK-NEXT:    ret <3 x i32> [[T3]]
 ;
   %t2 = zext <2 x i8> %x to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
index 3fd4a17d972af4..cd795aeeb212e3 100644
--- a/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
+++ b/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll
@@ -72,7 +72,7 @@ define i1 @p3_scalar_shifted2_urem_by_const(i32 %x, i32 %y) {
 
 define <4 x i1> @p4_vector_urem_by_const__splat(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @p4_vector_urem_by_const__splat(
-; CHECK-NEXT:    [[T0:%.*]] = and <4 x i32> [[X:%.*]], <i32 128, i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[T0:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 128)
 ; CHECK-NEXT:    [[T2:%.*]] = icmp eq <4 x i32> [[T0]], zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[T2]]
 ;
@@ -111,7 +111,7 @@ define <4 x i1> @p6_vector_urem_by_const__nonsplat_poison0(<4 x i32> %x, <4 x i3
 
 define <4 x i1> @p7_vector_urem_by_const__nonsplat_poison2(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @p7_vector_urem_by_const__nonsplat_poison2(
-; CHECK-NEXT:    [[T0:%.*]] = and <4 x i32> [[X:%.*]], <i32 128, i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[T0:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 128)
 ; CHECK-NEXT:    [[T2:%.*]] = icmp eq <4 x i32> [[T0]], <i32 0, i32 0, i32 poison, i32 0>
 ; CHECK-NEXT:    ret <4 x i1> [[T2]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/onehot_merge.ll b/llvm/test/Transforms/InstCombine/onehot_merge.ll
index d8ef66a4dd7818..2e57597455c2cd 100644
--- a/llvm/test/Transforms/InstCombine/onehot_merge.ll
+++ b/llvm/test/Transforms/InstCombine/onehot_merge.ll
@@ -31,8 +31,8 @@ define i1 @and_consts_logical(i32 %k, i32 %c1, i32 %c2) {
 
 define <2 x i1> @and_consts_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) {
 ; CHECK-LABEL: @and_consts_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], <i32 12, i32 12>
-; CHECK-NEXT:    [[OR:%.*]] = icmp ne <2 x i32> [[TMP1]], <i32 12, i32 12>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], splat (i32 12)
+; CHECK-NEXT:    [[OR:%.*]] = icmp ne <2 x i32> [[TMP1]], splat (i32 12)
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
 ;
   %t1 = and <2 x i32> <i32 4, i32 4>, %k
@@ -84,8 +84,8 @@ define i1 @foo1_and_logical(i32 %k, i32 %c1, i32 %c2) {
 
 define <2 x i1> @foo1_and_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) {
 ; CHECK-LABEL: @foo1_and_vector(
-; CHECK-NEXT:    [[T:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[C1:%.*]]
-; CHECK-NEXT:    [[T4:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[C2:%.*]]
+; CHECK-NEXT:    [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]]
+; CHECK-NEXT:    [[T4:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C2:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[K:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[OR:%.*]] = icmp ne <2 x i32> [[TMP2]], [[TMP1]]
@@ -148,8 +148,8 @@ define i1 @foo1_and_commuted_logical(i32 %k, i32 %c1, i32 %c2) {
 define <2 x i1> @foo1_and_commuted_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) {
 ; CHECK-LABEL: @foo1_and_commuted_vector(
 ; CHECK-NEXT:    [[K2:%.*]] = mul <2 x i32> [[K:%.*]], [[K]]
-; CHECK-NEXT:    [[T:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[C1:%.*]]
-; CHECK-NEXT:    [[T4:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[C2:%.*]]
+; CHECK-NEXT:    [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]]
+; CHECK-NEXT:    [[T4:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C2:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[K2]], [[TMP1]]
 ; CHECK-NEXT:    [[OR:%.*]] = icmp ne <2 x i32> [[TMP2]], [[TMP1]]
@@ -196,8 +196,8 @@ define i1 @or_consts_logical(i32 %k, i32 %c1, i32 %c2) {
 
 define <2 x i1> @or_consts_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) {
 ; CHECK-LABEL: @or_consts_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], <i32 12, i32 12>
-; CHECK-NEXT:    [[OR:%.*]] = icmp eq <2 x i32> [[TMP1]], <i32 12, i32 12>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[K:%.*]], splat (i32 12)
+; CHECK-NEXT:    [[OR:%.*]] = icmp eq <2 x i32> [[TMP1]], splat (i32 12)
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
 ;
   %t1 = and <2 x i32> <i32 4, i32 4>, %k
@@ -249,8 +249,8 @@ define i1 @foo1_or_logical(i32 %k, i32 %c1, i32 %c2) {
 
 define <2 x i1> @foo1_or_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) {
 ; CHECK-LABEL: @foo1_or_vector(
-; CHECK-NEXT:    [[T:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[C1:%.*]]
-; CHECK-NEXT:    [[T4:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[C2:%.*]]
+; CHECK-NEXT:    [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]]
+; CHECK-NEXT:    [[T4:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C2:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[K:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[OR:%.*]] = icmp eq <2 x i32> [[TMP2]], [[TMP1]]
@@ -313,8 +313,8 @@ define i1 @foo1_or_commuted_logical(i32 %k, i32 %c1, i32 %c2) {
 define <2 x i1> @foo1_or_commuted_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) {
 ; CHECK-LABEL: @foo1_or_commuted_vector(
 ; CHECK-NEXT:    [[K2:%.*]] = mul <2 x i32> [[K:%.*]], [[K]]
-; CHECK-NEXT:    [[T:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[C1:%.*]]
-; CHECK-NEXT:    [[T4:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[C2:%.*]]
+; CHECK-NEXT:    [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]]
+; CHECK-NEXT:    [[T4:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C2:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[K2]], [[TMP1]]
 ; CHECK-NEXT:    [[OR:%.*]] = icmp eq <2 x i32> [[TMP2]], [[TMP1]]
@@ -372,8 +372,8 @@ define i1 @foo1_and_signbit_lshr_logical(i32 %k, i32 %c1, i32 %c2) {
 
 define <2 x i1> @foo1_and_signbit_lshr_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) {
 ; CHECK-LABEL: @foo1_and_signbit_lshr_vector(
-; CHECK-NEXT:    [[T:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[C1:%.*]]
-; CHECK-NEXT:    [[T4:%.*]] = lshr exact <2 x i32> <i32 -2147483648, i32 -2147483648>, [[C2:%.*]]
+; CHECK-NEXT:    [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]]
+; CHECK-NEXT:    [[T4:%.*]] = lshr exact <2 x i32> splat (i32 -2147483648), [[C2:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[K:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[OR:%.*]] = icmp ne <2 x i32> [[TMP2]], [[TMP1]]
@@ -430,8 +430,8 @@ define i1 @foo1_or_signbit_lshr_logical(i32 %k, i32 %c1, i32 %c2) {
 
 define <2 x i1> @foo1_or_signbit_lshr_vector(<2 x i32> %k, <2 x i32> %c1, <2 x i32> %c2) {
 ; CHECK-LABEL: @foo1_or_signbit_lshr_vector(
-; CHECK-NEXT:    [[T:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[C1:%.*]]
-; CHECK-NEXT:    [[T4:%.*]] = lshr exact <2 x i32> <i32 -2147483648, i32 -2147483648>, [[C2:%.*]]
+; CHECK-NEXT:    [[T:%.*]] = shl nuw <2 x i32> splat (i32 1), [[C1:%.*]]
+; CHECK-NEXT:    [[T4:%.*]] = lshr exact <2 x i32> splat (i32 -2147483648), [[C2:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[T]], [[T4]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[K:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[OR:%.*]] = icmp eq <2 x i32> [[TMP2]], [[TMP1]]
diff --git a/llvm/test/Transforms/InstCombine/operand-complexity.ll b/llvm/test/Transforms/InstCombine/operand-complexity.ll
index 541a15275b6170..491bf950bf435a 100644
--- a/llvm/test/Transforms/InstCombine/operand-complexity.ll
+++ b/llvm/test/Transforms/InstCombine/operand-complexity.ll
@@ -60,7 +60,7 @@ define i8 @not(i8 %x) {
 define <2 x i8> @not_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @not_vec(
 ; CHECK-NEXT:    [[BO:%.*]] = udiv <2 x i8> [[X:%.*]], <i8 42, i8 -42>
-; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i8> [[X]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i8> [[X]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = mul <2 x i8> [[BO]], [[NOTX]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/or-concat.ll b/llvm/test/Transforms/InstCombine/or-concat.ll
index dfc3f0631773a3..deeaea33af9f12 100644
--- a/llvm/test/Transforms/InstCombine/or-concat.ll
+++ b/llvm/test/Transforms/InstCombine/or-concat.ll
@@ -65,7 +65,7 @@ define i64 @concat_bswap32_unary_flip(i64 %a0) {
 
 define <2 x i64> @concat_bswap32_unary_flip_vector(<2 x i64> %a0) {
 ; CHECK-LABEL: @concat_bswap32_unary_flip_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> <i64 32, i64 32>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> splat (i64 32))
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
 ;
@@ -103,7 +103,7 @@ define <2 x i64> @concat_bswap32_binary_vector(<2 x i32> %a0, <2 x i32> %a1) {
 ; CHECK-LABEL: @concat_bswap32_binary_vector(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i32> [[A1:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[A0:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint <2 x i64> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP4]])
 ; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
@@ -176,7 +176,7 @@ define i64 @concat_bitreverse32_unary_flip(i64 %a0) {
 
 define <2 x i64> @concat_bitreverse32_unary_flip_vector(<2 x i64> %a0) {
 ; CHECK-LABEL: @concat_bitreverse32_unary_flip_vector(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> <i64 32, i64 32>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> [[A0:%.*]], <2 x i64> [[A0]], <2 x i64> splat (i64 32))
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
 ;
@@ -214,7 +214,7 @@ define <2 x i64> @concat_bitreverse32_binary_vector(<2 x i32> %a0, <2 x i32> %a1
 ; CHECK-LABEL: @concat_bitreverse32_binary_vector(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i32> [[A1:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <2 x i32> [[A0:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i64> [[TMP2]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP4:%.*]] = or disjoint <2 x i64> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP4]])
 ; CHECK-NEXT:    ret <2 x i64> [[TMP5]]
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 9bcad034b363e7..69c27cd8908221 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -135,8 +135,8 @@ define i1 @test18_logical(i32 %A) {
 
 define <2 x i1> @test18vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test18vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], <i32 -100, i32 -100>
-; CHECK-NEXT:    [[D:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 -50, i32 -50>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[A:%.*]], splat (i32 -100)
+; CHECK-NEXT:    [[D:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -50)
 ; CHECK-NEXT:    ret <2 x i1> [[D]]
 ;
   %B = icmp sge <2 x i32> %A, <i32 100, i32 100>
@@ -196,8 +196,8 @@ define i16 @test23(i16 %A) {
 
 define <2 x i16> @test23vec(<2 x i16> %A) {
 ; CHECK-LABEL: @test23vec(
-; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 1, i16 1>
-; CHECK-NEXT:    [[D:%.*]] = xor <2 x i16> [[B]], <i16 -24575, i16 -24575>
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i16> [[A:%.*]], splat (i16 1)
+; CHECK-NEXT:    [[D:%.*]] = xor <2 x i16> [[B]], splat (i16 -24575)
 ; CHECK-NEXT:    ret <2 x i16> [[D]]
 ;
   %B = lshr <2 x i16> %A, <i16 1, i16 1>
@@ -400,8 +400,8 @@ define i32 @test30(i32 %A) {
 
 define <2 x i32> @test30vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test30vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 -58312, i32 -58312>
-; CHECK-NEXT:    [[E:%.*]] = or disjoint <2 x i32> [[TMP1]], <i32 32962, i32 32962>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -58312)
+; CHECK-NEXT:    [[E:%.*]] = or disjoint <2 x i32> [[TMP1]], splat (i32 32962)
 ; CHECK-NEXT:    ret <2 x i32> [[E]]
 ;
   %B = or <2 x i32> %A, <i32 32962, i32 32962>
@@ -430,8 +430,8 @@ define i64 @test31(i64 %A) {
 
 define <2 x i64> @test31vec(<2 x i64> %A) {
 ; CHECK-LABEL: @test31vec(
-; CHECK-NEXT:    [[E:%.*]] = and <2 x i64> [[A:%.*]], <i64 4294908984, i64 4294908984>
-; CHECK-NEXT:    [[F:%.*]] = or disjoint <2 x i64> [[E]], <i64 32962, i64 32962>
+; CHECK-NEXT:    [[E:%.*]] = and <2 x i64> [[A:%.*]], splat (i64 4294908984)
+; CHECK-NEXT:    [[F:%.*]] = or disjoint <2 x i64> [[E]], splat (i64 32962)
 ; CHECK-NEXT:    ret <2 x i64> [[F]]
 ;
   %B = or <2 x i64> %A, <i64 194, i64 194>
@@ -555,8 +555,8 @@ define i1 @test37_logical(i32 %x) {
 
 define <2 x i1> @test37_uniform(<2 x i32> %x) {
 ; CHECK-LABEL: @test37_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 7, i32 7>
-; CHECK-NEXT:    [[RET1:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 7)
+; CHECK-NEXT:    [[RET1:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i1> [[RET1]]
 ;
   %add1 = add <2 x i32> %x, <i32 7, i32 7>
@@ -861,9 +861,9 @@ define i1 @test46_logical(i8 signext %c)  {
 
 define <2 x i1> @test46_uniform(<2 x i8> %c)  {
 ; CHECK-LABEL: @test46_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[C:%.*]], <i8 -33, i8 -33>
-; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], <i8 -65, i8 -65>
-; CHECK-NEXT:    [[OR:%.*]] = icmp ult <2 x i8> [[TMP2]], <i8 26, i8 26>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[C:%.*]], splat (i8 -33)
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 -65)
+; CHECK-NEXT:    [[OR:%.*]] = icmp ult <2 x i8> [[TMP2]], splat (i8 26)
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
 ;
   %c.off = add <2 x i8> %c, <i8 -97, i8 -97>
@@ -940,9 +940,9 @@ define i1 @test47_logical(i8 signext %c)  {
 define <2 x i1> @test47_nonuniform(<2 x i8> %c)  {
 ; CHECK-LABEL: @test47_nonuniform(
 ; CHECK-NEXT:    [[C_OFF:%.*]] = add <2 x i8> [[C:%.*]], <i8 -65, i8 -97>
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i8> [[C_OFF]], <i8 27, i8 27>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult <2 x i8> [[C_OFF]], splat (i8 27)
 ; CHECK-NEXT:    [[C_OFF17:%.*]] = add <2 x i8> [[C]], <i8 -97, i8 -65>
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult <2 x i8> [[C_OFF17]], <i8 27, i8 27>
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp ult <2 x i8> [[C_OFF17]], splat (i8 27)
 ; CHECK-NEXT:    [[OR:%.*]] = or <2 x i1> [[CMP1]], [[CMP2]]
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
 ;
@@ -966,7 +966,7 @@ define i32 @test49(i1 %C) {
 
 define <2 x i32> @test49vec(i1 %C) {
 ; CHECK-LABEL: @test49vec(
-; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 1019, i32 1019>, <2 x i32> <i32 123, i32 123>
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 1019), <2 x i32> splat (i32 123)
 ; CHECK-NEXT:    ret <2 x i32> [[V]]
 ;
   %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
@@ -1013,7 +1013,7 @@ define <2 x i32> @test50vec(i1 %which) {
 ; CHECK:       delay:
 ; CHECK-NEXT:    br label [[FINAL]]
 ; CHECK:       final:
-; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1019, i32 1019>, [[ENTRY:%.*]] ], [ <i32 123, i32 123>, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ splat (i32 1019), [[ENTRY:%.*]] ], [ splat (i32 123), [[DELAY]] ]
 ; CHECK-NEXT:    ret <2 x i32> [[A]]
 ;
 entry:
@@ -1404,7 +1404,7 @@ define i32 @test3(i32 %x, i32 %y) {
 define <2 x i32> @test4_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @test4_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[OR1:%.*]] = xor <2 x i32> [[TMP1]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[OR1:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i32> [[OR1]]
 ;
   %or = or <2 x i32> %y, %x
@@ -1479,7 +1479,7 @@ define i8 @ashr_bitwidth_mask(i8 %x, i8 %y) {
 define <2 x i8> @ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) {
 ; CHECK-LABEL: @ashr_bitwidth_mask_vec_commute(
 ; CHECK-NEXT:    [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], <i8 42, i8 2>
-; CHECK-NEXT:    [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7)
 ; CHECK-NEXT:    [[R:%.*]] = or <2 x i8> [[Y]], [[SIGN]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/overflow-mul.ll b/llvm/test/Transforms/InstCombine/overflow-mul.ll
index 6b5a65c03ee102..1d18d9ffd46d26 100644
--- a/llvm/test/Transforms/InstCombine/overflow-mul.ll
+++ b/llvm/test/Transforms/InstCombine/overflow-mul.ll
@@ -216,7 +216,7 @@ define <4 x i32> @pr20113(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-NEXT:    [[VMOVL_I_I726:%.*]] = zext <4 x i16> [[A:%.*]] to <4 x i32>
 ; CHECK-NEXT:    [[VMOVL_I_I712:%.*]] = zext <4 x i16> [[B:%.*]] to <4 x i32>
 ; CHECK-NEXT:    [[MUL_I703:%.*]] = mul nuw <4 x i32> [[VMOVL_I_I712]], [[VMOVL_I_I726]]
-; CHECK-NEXT:    [[TMP:%.*]] = icmp sgt <4 x i32> [[MUL_I703]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP:%.*]] = icmp sgt <4 x i32> [[MUL_I703]], splat (i32 -1)
 ; CHECK-NEXT:    [[VCGEZ_I:%.*]] = sext <4 x i1> [[TMP]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[VCGEZ_I]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll
index 478e6bd5878d5a..ab2584add4da7e 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-a.ll
@@ -56,11 +56,11 @@ declare void @use8xi64(<8 x i64>)
 
 define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64>
-; CHECK-NEXT:    [[T2:%.*]] = shl nuw <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = add <8 x i64> [[T2]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
-; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T2:%.*]] = shl nuw <8 x i64> splat (i64 1), [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = add <8 x i64> [[T2]], splat (i64 -1)
+; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T2]])
@@ -68,7 +68,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
-; CHECK-NEXT:    [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[T7:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <8 x i32> [[T7]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -128,9 +128,9 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t3_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -33, i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32>
 ; CHECK-NEXT:    [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64>
-; CHECK-NEXT:    [[T2:%.*]] = shl nuw <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = add <8 x i64> [[T2]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
-; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T2:%.*]] = shl nuw <8 x i64> splat (i64 1), [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = add <8 x i64> [[T2]], splat (i64 -1)
+; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T2]])
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll
index 469375633b60e1..3db87b00c1a901 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll
@@ -56,11 +56,11 @@ declare void @use8xi64(<8 x i64>)
 
 define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64>
-; CHECK-NEXT:    [[T2:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = xor <8 x i64> [[T2]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
-; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T2:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = xor <8 x i64> [[T2]], splat (i64 -1)
+; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T2]])
@@ -68,7 +68,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T4]]
-; CHECK-NEXT:    [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[T7:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <8 x i32> [[T7]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -128,9 +128,9 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t3_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -33, i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32>
 ; CHECK-NEXT:    [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64>
-; CHECK-NEXT:    [[T2:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = xor <8 x i64> [[T2]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
-; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T2:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = xor <8 x i64> [[T2]], splat (i64 -1)
+; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T2]])
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll
index 1a711e58c333be..e16aac9b912b25 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll
@@ -49,14 +49,14 @@ declare void @use8xi64(<8 x i64>)
 define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[T1:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33>
+; CHECK-NEXT:    [[T1:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -33)
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = zext <8 x i32> %nbits to <8 x i64>
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll
index cd0098ecdb0a6a..ebfa456cf0dd83 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll
@@ -53,16 +53,16 @@ declare void @use8xi64(<8 x i64>)
 define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33>
+; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]]
+; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -33)
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T2]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T3]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X:%.*]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T3]]
-; CHECK-NEXT:    [[T6:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[T6:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <8 x i32> [[T6]]
 ;
   %t0 = zext <8 x i32> %nbits to <8 x i64>
@@ -85,7 +85,7 @@ define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t2_vec_splat_poison(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
 ; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]]
 ; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 poison, i32 -33>
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
@@ -116,7 +116,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t3_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
 ; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]]
 ; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 poison, i32 65>
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll
index 9a4a5dd890eec2..5dda49b280e191 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-e.ll
@@ -50,13 +50,13 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
 ; CHECK-NEXT:    [[T1:%.*]] = shl <8 x i64> [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33>
+; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -33)
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <8 x i64> [[X]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i32> [[TMP1]], [[T2]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP2]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = zext <8 x i32> %nbits to <8 x i64>
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll
index a9fafdbf7b8db7..4a0752f3d0e0ee 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-a.ll
@@ -46,16 +46,16 @@ declare void @use8xi32(<8 x i32>)
 
 define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[T1:%.*]] = shl nuw <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[T1:%.*]] = shl nuw <8 x i32> splat (i32 1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[T1]], splat (i32 -1)
+; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -101,9 +101,9 @@ define <8 x i32> @t1_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) {
 define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t2_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -33, i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32>
-; CHECK-NEXT:    [[T1:%.*]] = shl nuw <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T1:%.*]] = shl nuw <8 x i32> splat (i32 1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[T1]], splat (i32 -1)
+; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll
index bce2a1c3f7e505..a6c3f334172654 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll
@@ -46,16 +46,16 @@ declare void @use8xi32(<8 x i32>)
 
 define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = xor <8 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i32> splat (i32 -1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = xor <8 x i32> [[T1]], splat (i32 -1)
+; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T4]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T4]]
-; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[T5:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <8 x i32> [[T5]]
 ;
   %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
@@ -101,9 +101,9 @@ define <8 x i32> @t1_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) {
 define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t2_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -33, i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32>
-; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = xor <8 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i32> splat (i32 -1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = xor <8 x i32> [[T1]], splat (i32 -1)
+; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll
index 55d0b3f80a519b..cc2a33e43cbf1b 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll
@@ -38,12 +38,12 @@ declare void @use8xi32(<8 x i32>)
 
 define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T0:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -1)
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T2]]
-; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <8 x i32> [[T3]]
 ;
   %t0 = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %nbits
@@ -76,7 +76,7 @@ define <8 x i32> @t1_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) {
 
 define <8 x i32> @t1_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS:%.*]]
 ; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 33>
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll
index 7ad99a6bb0a38f..eff9bcb2ddb783 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll
@@ -42,14 +42,14 @@ declare void @use8xi32(<8 x i32>)
 
 define <8 x i32> @t2_vec_splat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t2_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = shl nsw <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, [[NBITS]]
-; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T0:%.*]] = shl nsw <8 x i32> splat (i32 -1), [[NBITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS]]
+; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -1)
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T3]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T3]]
-; CHECK-NEXT:    [[T4:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[T4:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <8 x i32> [[T4]]
 ;
   %t0 = shl <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, %nbits
@@ -66,7 +66,7 @@ define <8 x i32> @t2_vec_splat(<8 x i32> %x, <8 x i32> %nbits) {
 define <8 x i32> @t2_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t2_vec_splat_poison(
 ; CHECK-NEXT:    [[T0:%.*]] = shl nsw <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, [[NBITS]]
+; CHECK-NEXT:    [[T1:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS]]
 ; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1>
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T1]])
@@ -88,8 +88,8 @@ define <8 x i32> @t2_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) {
 
 define <8 x i32> @t2_vec_nonsplat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t2_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = shl nsw <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, [[NBITS]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nsw <8 x i32> splat (i32 -1), [[NBITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = lshr <8 x i32> splat (i32 -1), [[NBITS]]
 ; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -31, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 33>
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T1]])
diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll
index 7be65330013129..66a46aeb908b54 100644
--- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll
+++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-e.ll
@@ -39,11 +39,11 @@ declare void @use8xi32(<8 x i32>)
 define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <8 x i32> [[X:%.*]], [[NBITS:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -1)
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T2]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> [[X]], [[T2]]
-; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[T3:%.*]] = and <8 x i32> [[TMP1]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <8 x i32> [[T3]]
 ;
   %t0 = shl <8 x i32> %x, %nbits
diff --git a/llvm/test/Transforms/InstCombine/pow-1.ll b/llvm/test/Transforms/InstCombine/pow-1.ll
index 91e909aa25c7e3..c23d15d16a34ba 100644
--- a/llvm/test/Transforms/InstCombine/pow-1.ll
+++ b/llvm/test/Transforms/InstCombine/pow-1.ll
@@ -97,7 +97,7 @@ define float @test_simplify1_noerrno(float %x) {
 define <2 x float> @test_simplify1v(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_simplify1v(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
   %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 1.0, float 1.0>, <2 x float> %x)
   ret <2 x float> %retval
@@ -134,7 +134,7 @@ define double @test_simplify2_noerrno(double %x) {
 define <2 x double> @test_simplify2v(<2 x double> %x) {
 ; CHECK-LABEL: define <2 x double> @test_simplify2v(
 ; CHECK-SAME: <2 x double> [[X:%.*]]) {
-; CHECK-NEXT:    ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    ret <2 x double> splat (double 1.000000e+00)
 ;
   %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 1.0, double 1.0>, <2 x double> %x)
   ret <2 x double> %retval
@@ -253,22 +253,22 @@ define <2 x float> @test_simplify3v(<2 x float> %x) {
 ;
 ; VC32-LABEL: define <2 x float> @test_simplify3v(
 ; VC32-SAME: <2 x float> [[X:%.*]]) {
-; VC32-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> [[X]])
+; VC32-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 2.000000e+00), <2 x float> [[X]])
 ; VC32-NEXT:    ret <2 x float> [[RETVAL]]
 ;
 ; VC19-LABEL: define <2 x float> @test_simplify3v(
 ; VC19-SAME: <2 x float> [[X:%.*]]) {
-; VC19-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> [[X]])
+; VC19-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 2.000000e+00), <2 x float> [[X]])
 ; VC19-NEXT:    ret <2 x float> [[RETVAL]]
 ;
 ; VC64-LABEL: define <2 x float> @test_simplify3v(
 ; VC64-SAME: <2 x float> [[X:%.*]]) {
-; VC64-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> [[X]])
+; VC64-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 2.000000e+00), <2 x float> [[X]])
 ; VC64-NEXT:    ret <2 x float> [[RETVAL]]
 ;
 ; NOLIB-LABEL: define <2 x float> @test_simplify3v(
 ; NOLIB-SAME: <2 x float> [[X:%.*]]) {
-; NOLIB-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> [[X]])
+; NOLIB-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 2.000000e+00), <2 x float> [[X]])
 ; NOLIB-NEXT:    ret <2 x float> [[RETVAL]]
 ;
   %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 2.0, float 2.0>, <2 x float> %x)
@@ -279,28 +279,28 @@ define <2 x float> @test_simplify3v(<2 x float> %x) {
 define <2 x double> @test_simplify3vn(<2 x double> %x) {
 ; ANY-LABEL: define <2 x double> @test_simplify3vn(
 ; ANY-SAME: <2 x double> [[X:%.*]]) {
-; ANY-NEXT:    [[MUL:%.*]] = fmul <2 x double> [[X]], <double 2.000000e+00, double 2.000000e+00>
+; ANY-NEXT:    [[MUL:%.*]] = fmul <2 x double> [[X]], splat (double 2.000000e+00)
 ; ANY-NEXT:    [[EXP2:%.*]] = call <2 x double> @llvm.exp2.v2f64(<2 x double> [[MUL]])
 ; ANY-NEXT:    ret <2 x double> [[EXP2]]
 ;
 ; VC32-LABEL: define <2 x double> @test_simplify3vn(
 ; VC32-SAME: <2 x double> [[X:%.*]]) {
-; VC32-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 4.000000e+00, double 4.000000e+00>, <2 x double> [[X]])
+; VC32-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 4.000000e+00), <2 x double> [[X]])
 ; VC32-NEXT:    ret <2 x double> [[RETVAL]]
 ;
 ; VC19-LABEL: define <2 x double> @test_simplify3vn(
 ; VC19-SAME: <2 x double> [[X:%.*]]) {
-; VC19-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 4.000000e+00, double 4.000000e+00>, <2 x double> [[X]])
+; VC19-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 4.000000e+00), <2 x double> [[X]])
 ; VC19-NEXT:    ret <2 x double> [[RETVAL]]
 ;
 ; VC64-LABEL: define <2 x double> @test_simplify3vn(
 ; VC64-SAME: <2 x double> [[X:%.*]]) {
-; VC64-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 4.000000e+00, double 4.000000e+00>, <2 x double> [[X]])
+; VC64-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 4.000000e+00), <2 x double> [[X]])
 ; VC64-NEXT:    ret <2 x double> [[RETVAL]]
 ;
 ; NOLIB-LABEL: define <2 x double> @test_simplify3vn(
 ; NOLIB-SAME: <2 x double> [[X:%.*]]) {
-; NOLIB-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 4.000000e+00, double 4.000000e+00>, <2 x double> [[X]])
+; NOLIB-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 4.000000e+00), <2 x double> [[X]])
 ; NOLIB-NEXT:    ret <2 x double> [[RETVAL]]
 ;
   %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 4.0, double 4.0>, <2 x double> %x)
@@ -450,22 +450,22 @@ define <2 x double> @test_simplify4v(<2 x double> %x) {
 ;
 ; VC32-LABEL: define <2 x double> @test_simplify4v(
 ; VC32-SAME: <2 x double> [[X:%.*]]) {
-; VC32-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> [[X]])
+; VC32-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 2.000000e+00), <2 x double> [[X]])
 ; VC32-NEXT:    ret <2 x double> [[RETVAL]]
 ;
 ; VC19-LABEL: define <2 x double> @test_simplify4v(
 ; VC19-SAME: <2 x double> [[X:%.*]]) {
-; VC19-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> [[X]])
+; VC19-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 2.000000e+00), <2 x double> [[X]])
 ; VC19-NEXT:    ret <2 x double> [[RETVAL]]
 ;
 ; VC64-LABEL: define <2 x double> @test_simplify4v(
 ; VC64-SAME: <2 x double> [[X:%.*]]) {
-; VC64-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> [[X]])
+; VC64-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 2.000000e+00), <2 x double> [[X]])
 ; VC64-NEXT:    ret <2 x double> [[RETVAL]]
 ;
 ; NOLIB-LABEL: define <2 x double> @test_simplify4v(
 ; NOLIB-SAME: <2 x double> [[X:%.*]]) {
-; NOLIB-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> [[X]])
+; NOLIB-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 2.000000e+00), <2 x double> [[X]])
 ; NOLIB-NEXT:    ret <2 x double> [[RETVAL]]
 ;
   %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 2.0, double 2.0>, <2 x double> %x)
@@ -482,22 +482,22 @@ define <2 x float> @test_simplify4vn(<2 x float> %x) {
 ;
 ; VC32-LABEL: define <2 x float> @test_simplify4vn(
 ; VC32-SAME: <2 x float> [[X:%.*]]) {
-; VC32-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 5.000000e-01, float 5.000000e-01>, <2 x float> [[X]])
+; VC32-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 5.000000e-01), <2 x float> [[X]])
 ; VC32-NEXT:    ret <2 x float> [[RETVAL]]
 ;
 ; VC19-LABEL: define <2 x float> @test_simplify4vn(
 ; VC19-SAME: <2 x float> [[X:%.*]]) {
-; VC19-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 5.000000e-01, float 5.000000e-01>, <2 x float> [[X]])
+; VC19-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 5.000000e-01), <2 x float> [[X]])
 ; VC19-NEXT:    ret <2 x float> [[RETVAL]]
 ;
 ; VC64-LABEL: define <2 x float> @test_simplify4vn(
 ; VC64-SAME: <2 x float> [[X:%.*]]) {
-; VC64-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 5.000000e-01, float 5.000000e-01>, <2 x float> [[X]])
+; VC64-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 5.000000e-01), <2 x float> [[X]])
 ; VC64-NEXT:    ret <2 x float> [[RETVAL]]
 ;
 ; NOLIB-LABEL: define <2 x float> @test_simplify4vn(
 ; NOLIB-SAME: <2 x float> [[X:%.*]]) {
-; NOLIB-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 5.000000e-01, float 5.000000e-01>, <2 x float> [[X]])
+; NOLIB-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 5.000000e-01), <2 x float> [[X]])
 ; NOLIB-NEXT:    ret <2 x float> [[RETVAL]]
 ;
   %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 0.5, float 0.5>, <2 x float> %x)
@@ -573,7 +573,7 @@ define float @test_simplify5_noerrno(float %x) {
 define <2 x float> @test_simplify5v(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_simplify5v(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
   %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float 0.0, float 0.0>)
   ret <2 x float> %retval
@@ -610,7 +610,7 @@ define double @test_simplify6_noerrno(double %x) {
 define <2 x double> @test_simplify6v(<2 x double> %x) {
 ; CHECK-LABEL: define <2 x double> @test_simplify6v(
 ; CHECK-SAME: <2 x double> [[X:%.*]]) {
-; CHECK-NEXT:    ret <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    ret <2 x double> splat (double 1.000000e+00)
 ;
   %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 0.0, double 0.0>)
   ret <2 x double> %retval
@@ -1317,7 +1317,7 @@ define float @pow_neg1_strict_noerrno(float %x) {
 define <2 x float> @pow_neg1_strictv(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @pow_neg1_strictv(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <2 x float> [[RECIPROCAL]]
 ;
   %r = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> <float -1.0, float -1.0>)
@@ -1357,7 +1357,7 @@ define double @pow_neg1_double_fast_noerrno(double %x) {
 define <2 x double> @pow_neg1_double_fastv(<2 x double> %x) {
 ; CHECK-LABEL: define <2 x double> @pow_neg1_double_fastv(
 ; CHECK-SAME: <2 x double> [[X:%.*]]) {
-; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[X]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv fast <2 x double> splat (double 1.000000e+00), [[X]]
 ; CHECK-NEXT:    ret <2 x double> [[RECIPROCAL]]
 ;
   %r = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -1.0, double -1.0>)
@@ -1533,7 +1533,7 @@ define bfloat @test_pow_10_bf16(bfloat %x) {
 define <2 x half> @test_pow_10_v2f16(<2 x half> %x) {
 ; CHECK-LABEL: define <2 x half> @test_pow_10_v2f16(
 ; CHECK-SAME: <2 x half> [[X:%.*]]) {
-; CHECK-NEXT:    [[RETVAL:%.*]] = call <2 x half> @llvm.pow.v2f16(<2 x half> <half 0xH4900, half 0xH4900>, <2 x half> [[X]])
+; CHECK-NEXT:    [[RETVAL:%.*]] = call <2 x half> @llvm.pow.v2f16(<2 x half> splat (half 0xH4900), <2 x half> [[X]])
 ; CHECK-NEXT:    ret <2 x half> [[RETVAL]]
 ;
   %retval = call <2 x half> @llvm.pow.v2f16(<2 x half> <half 10.0, half 10.0>, <2 x half> %x)
@@ -1543,7 +1543,7 @@ define <2 x half> @test_pow_10_v2f16(<2 x half> %x) {
 define <2 x float> @test_pow_10_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: define <2 x float> @test_pow_10_v2f32(
 ; CHECK-SAME: <2 x float> [[X:%.*]]) {
-; CHECK-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 1.000000e+01, float 1.000000e+01>, <2 x float> [[X]])
+; CHECK-NEXT:    [[RETVAL:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 1.000000e+01), <2 x float> [[X]])
 ; CHECK-NEXT:    ret <2 x float> [[RETVAL]]
 ;
   %retval = call <2 x float> @llvm.pow.v2f32(<2 x float> <float 10.0, float 10.0>, <2 x float> %x)
@@ -1553,7 +1553,7 @@ define <2 x float> @test_pow_10_v2f32(<2 x float> %x) {
 define <2 x double> @test_pow_10_v2f64(<2 x double> %x) {
 ; CHECK-LABEL: define <2 x double> @test_pow_10_v2f64(
 ; CHECK-SAME: <2 x double> [[X:%.*]]) {
-; CHECK-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 1.000000e+01, double 1.000000e+01>, <2 x double> [[X]])
+; CHECK-NEXT:    [[RETVAL:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 1.000000e+01), <2 x double> [[X]])
 ; CHECK-NEXT:    ret <2 x double> [[RETVAL]]
 ;
   %retval = call <2 x double> @llvm.pow.v2f64(<2 x double> <double 10.0, double 10.0>, <2 x double> %x)
@@ -1563,7 +1563,7 @@ define <2 x double> @test_pow_10_v2f64(<2 x double> %x) {
 define <2 x bfloat> @test_pow_10_v2bf16(<2 x bfloat> %x) {
 ; CHECK-LABEL: define <2 x bfloat> @test_pow_10_v2bf16(
 ; CHECK-SAME: <2 x bfloat> [[X:%.*]]) {
-; CHECK-NEXT:    [[RETVAL:%.*]] = call <2 x bfloat> @llvm.pow.v2bf16(<2 x bfloat> <bfloat 0xR4120, bfloat 0xR4120>, <2 x bfloat> [[X]])
+; CHECK-NEXT:    [[RETVAL:%.*]] = call <2 x bfloat> @llvm.pow.v2bf16(<2 x bfloat> splat (bfloat 0xR4120), <2 x bfloat> [[X]])
 ; CHECK-NEXT:    ret <2 x bfloat> [[RETVAL]]
 ;
   %retval = call <2 x bfloat> @llvm.pow.v2bf16(<2 x bfloat> <bfloat 10.0, bfloat 10.0>, <2 x bfloat> %x)
diff --git a/llvm/test/Transforms/InstCombine/pow-sqrt.ll b/llvm/test/Transforms/InstCombine/pow-sqrt.ll
index 9272c1c33a953c..7eef670ccea3dc 100644
--- a/llvm/test/Transforms/InstCombine/pow-sqrt.ll
+++ b/llvm/test/Transforms/InstCombine/pow-sqrt.ll
@@ -51,8 +51,8 @@ define <2 x double> @pow_intrinsic_half_approx(<2 x double> %x) {
 ; CHECK-LABEL: @pow_intrinsic_half_approx(
 ; CHECK-NEXT:    [[SQRT:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
 ; CHECK-NEXT:    [[ABS:%.*]] = call afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]])
-; CHECK-NEXT:    [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], <double 0xFFF0000000000000, double 0xFFF0000000000000>
-; CHECK-NEXT:    [[POW:%.*]] = select afn <2 x i1> [[ISINF]], <2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>, <2 x double> [[ABS]]
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], splat (double 0xFFF0000000000000)
+; CHECK-NEXT:    [[POW:%.*]] = select afn <2 x i1> [[ISINF]], <2 x double> splat (double 0x7FF0000000000000), <2 x double> [[ABS]]
 ; CHECK-NEXT:    ret <2 x double> [[POW]]
 ;
   %pow = call afn <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double 5.0e-01, double 5.0e-01>)
@@ -206,7 +206,7 @@ define float @pow_libcall_neghalf_afn(float %x) {
 
 define <2 x double> @pow_intrinsic_neghalf_no_FMF(<2 x double> %x) {
 ; CHECK-LABEL: @pow_intrinsic_neghalf_no_FMF(
-; CHECK-NEXT:    [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> <double -5.000000e-01, double -5.000000e-01>)
+; CHECK-NEXT:    [[POW:%.*]] = call <2 x double> @llvm.pow.v2f64(<2 x double> [[X:%.*]], <2 x double> splat (double -5.000000e-01))
 ; CHECK-NEXT:    ret <2 x double> [[POW]]
 ;
   %pow = call <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -5.0e-01, double -5.0e-01>)
@@ -221,8 +221,8 @@ define <2 x double> @pow_intrinsic_neghalf_reassoc(<2 x double> %x) {
 ; CHECK-LABEL: @pow_intrinsic_neghalf_reassoc(
 ; CHECK-NEXT:    [[SQRT:%.*]] = call reassoc <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
 ; CHECK-NEXT:    [[ABS:%.*]] = call reassoc <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]])
-; CHECK-NEXT:    [[ISINF:%.*]] = fcmp reassoc oeq <2 x double> [[X]], <double 0xFFF0000000000000, double 0xFFF0000000000000>
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[ABS]]
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp reassoc oeq <2 x double> [[X]], splat (double 0xFFF0000000000000)
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc <2 x double> splat (double 1.000000e+00), [[ABS]]
 ; CHECK-NEXT:    [[RECIPROCAL:%.*]] = select <2 x i1> [[ISINF]], <2 x double> zeroinitializer, <2 x double> [[TMP1]]
 ; CHECK-NEXT:    ret <2 x double> [[RECIPROCAL]]
 ;
@@ -238,8 +238,8 @@ define <2 x double> @pow_intrinsic_neghalf_afn(<2 x double> %x) {
 ; CHECK-LABEL: @pow_intrinsic_neghalf_afn(
 ; CHECK-NEXT:    [[SQRT:%.*]] = call afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
 ; CHECK-NEXT:    [[ABS:%.*]] = call afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]])
-; CHECK-NEXT:    [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], <double 0xFFF0000000000000, double 0xFFF0000000000000>
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv afn <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[ABS]]
+; CHECK-NEXT:    [[ISINF:%.*]] = fcmp afn oeq <2 x double> [[X]], splat (double 0xFFF0000000000000)
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv afn <2 x double> splat (double 1.000000e+00), [[ABS]]
 ; CHECK-NEXT:    [[RECIPROCAL:%.*]] = select <2 x i1> [[ISINF]], <2 x double> zeroinitializer, <2 x double> [[TMP1]]
 ; CHECK-NEXT:    ret <2 x double> [[RECIPROCAL]]
 ;
@@ -264,7 +264,7 @@ define <2 x double> @pow_intrinsic_neghalf_ninf(<2 x double> %x) {
 ; CHECK-LABEL: @pow_intrinsic_neghalf_ninf(
 ; CHECK-NEXT:    [[SQRT:%.*]] = call ninf afn <2 x double> @llvm.sqrt.v2f64(<2 x double> [[X:%.*]])
 ; CHECK-NEXT:    [[ABS:%.*]] = call ninf afn <2 x double> @llvm.fabs.v2f64(<2 x double> [[SQRT]])
-; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv ninf afn <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[ABS]]
+; CHECK-NEXT:    [[RECIPROCAL:%.*]] = fdiv ninf afn <2 x double> splat (double 1.000000e+00), [[ABS]]
 ; CHECK-NEXT:    ret <2 x double> [[RECIPROCAL]]
 ;
   %pow = call afn ninf <2 x double> @llvm.pow.v2f64(<2 x double> %x, <2 x double> <double -5.0e-01, double -5.0e-01>)
diff --git a/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll b/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll
index d31b7c9fe28359..0c4418a3094f96 100644
--- a/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll
+++ b/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll
@@ -127,7 +127,7 @@ define half @pow_sitofp_f16_const_base_2(i32 %x) {
 define <2 x float> @pow_sitofp_v2f32_const_base_2(<2 x i32> %x) {
 ; CHECK-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2(
 ; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[X]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[X]])
 ; CHECK-NEXT:    ret <2 x float> [[EXP2]]
 ;
   %itofp = sitofp <2 x i32> %x to <2 x float>
@@ -139,20 +139,20 @@ define <2 x float> @pow_sitofp_v2f32_const_base_8(<2 x i32> %x) {
 ; LDEXP-EXP2-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_8(
 ; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) {
 ; LDEXP-EXP2-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x float>
-; LDEXP-EXP2-NEXT:    [[MUL:%.*]] = fmul <2 x float> [[ITOFP]], <float 3.000000e+00, float 3.000000e+00>
+; LDEXP-EXP2-NEXT:    [[MUL:%.*]] = fmul <2 x float> [[ITOFP]], splat (float 3.000000e+00)
 ; LDEXP-EXP2-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @llvm.exp2.v2f32(<2 x float> [[MUL]])
 ; LDEXP-EXP2-NEXT:    ret <2 x float> [[EXP2]]
 ;
 ; LDEXP-NOEXP2-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_8(
 ; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) {
 ; LDEXP-NOEXP2-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x float>
-; LDEXP-NOEXP2-NEXT:    [[POW:%.*]] = tail call <2 x float> @llvm.pow.v2f32(<2 x float> <float 8.000000e+00, float 8.000000e+00>, <2 x float> [[ITOFP]])
+; LDEXP-NOEXP2-NEXT:    [[POW:%.*]] = tail call <2 x float> @llvm.pow.v2f32(<2 x float> splat (float 8.000000e+00), <2 x float> [[ITOFP]])
 ; LDEXP-NOEXP2-NEXT:    ret <2 x float> [[POW]]
 ;
 ; NOLDEXP-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_8(
 ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) {
 ; NOLDEXP-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x float>
-; NOLDEXP-NEXT:    [[MUL:%.*]] = fmul <2 x float> [[ITOFP]], <float 3.000000e+00, float 3.000000e+00>
+; NOLDEXP-NEXT:    [[MUL:%.*]] = fmul <2 x float> [[ITOFP]], splat (float 3.000000e+00)
 ; NOLDEXP-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @llvm.exp2.v2f32(<2 x float> [[MUL]])
 ; NOLDEXP-NEXT:    ret <2 x float> [[EXP2]]
 ;
@@ -176,7 +176,7 @@ define <2 x float> @pow_sitofp_v2f32_const_base_mixed_2(<2 x i32> %x) {
 define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(<2 x i32> %x) {
 ; CHECK-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(
 ; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[EXP2:%.*]] = tail call nsz afn <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[X]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call nsz afn <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[X]])
 ; CHECK-NEXT:    ret <2 x float> [[EXP2]]
 ;
   %itofp = sitofp <2 x i32> %x to <2 x float>
@@ -198,7 +198,7 @@ define <vscale x 4 x float> @pow_sitofp_nxv4f32_const_base_2(<vscale x 4 x i32>
 define <2 x half> @pow_sitofp_v2f16_const_base_2(<2 x i32> %x) {
 ; CHECK-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_2(
 ; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> <half 0xH3C00, half 0xH3C00>, <2 x i32> [[X]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> splat (half 0xH3C00), <2 x i32> [[X]])
 ; CHECK-NEXT:    ret <2 x half> [[EXP2]]
 ;
   %itofp = sitofp <2 x i32> %x to <2 x half>
@@ -209,7 +209,7 @@ define <2 x half> @pow_sitofp_v2f16_const_base_2(<2 x i32> %x) {
 define <2 x double> @pow_sitofp_v2f64_const_base_2(<2 x i32> %x) {
 ; CHECK-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_2(
 ; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> <double 1.000000e+00, double 1.000000e+00>, <2 x i32> [[X]])
+; CHECK-NEXT:    [[EXP2:%.*]] = tail call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> splat (double 1.000000e+00), <2 x i32> [[X]])
 ; CHECK-NEXT:    ret <2 x double> [[EXP2]]
 ;
   %itofp = sitofp <2 x i32> %x to <2 x double>
@@ -228,20 +228,20 @@ define <2 x half> @pow_sitofp_v2f16_const_base_8(<2 x i32> %x) {
 ; LDEXP-EXP2-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_8(
 ; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) {
 ; LDEXP-EXP2-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x half>
-; LDEXP-EXP2-NEXT:    [[MUL:%.*]] = fmul <2 x half> [[ITOFP]], <half 0xH4200, half 0xH4200>
+; LDEXP-EXP2-NEXT:    [[MUL:%.*]] = fmul <2 x half> [[ITOFP]], splat (half 0xH4200)
 ; LDEXP-EXP2-NEXT:    [[EXP2:%.*]] = tail call <2 x half> @llvm.exp2.v2f16(<2 x half> [[MUL]])
 ; LDEXP-EXP2-NEXT:    ret <2 x half> [[EXP2]]
 ;
 ; LDEXP-NOEXP2-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_8(
 ; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) {
 ; LDEXP-NOEXP2-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x half>
-; LDEXP-NOEXP2-NEXT:    [[POW:%.*]] = tail call <2 x half> @llvm.pow.v2f16(<2 x half> <half 0xH4800, half 0xH4800>, <2 x half> [[ITOFP]])
+; LDEXP-NOEXP2-NEXT:    [[POW:%.*]] = tail call <2 x half> @llvm.pow.v2f16(<2 x half> splat (half 0xH4800), <2 x half> [[ITOFP]])
 ; LDEXP-NOEXP2-NEXT:    ret <2 x half> [[POW]]
 ;
 ; NOLDEXP-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_8(
 ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) {
 ; NOLDEXP-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x half>
-; NOLDEXP-NEXT:    [[MUL:%.*]] = fmul <2 x half> [[ITOFP]], <half 0xH4200, half 0xH4200>
+; NOLDEXP-NEXT:    [[MUL:%.*]] = fmul <2 x half> [[ITOFP]], splat (half 0xH4200)
 ; NOLDEXP-NEXT:    [[EXP2:%.*]] = tail call <2 x half> @llvm.exp2.v2f16(<2 x half> [[MUL]])
 ; NOLDEXP-NEXT:    ret <2 x half> [[EXP2]]
 ;
@@ -261,20 +261,20 @@ define <2 x double> @pow_sitofp_v2f64_const_base_8(<2 x i32> %x) {
 ; LDEXP-EXP2-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_8(
 ; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) {
 ; LDEXP-EXP2-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x double>
-; LDEXP-EXP2-NEXT:    [[MUL:%.*]] = fmul <2 x double> [[ITOFP]], <double 3.000000e+00, double 3.000000e+00>
+; LDEXP-EXP2-NEXT:    [[MUL:%.*]] = fmul <2 x double> [[ITOFP]], splat (double 3.000000e+00)
 ; LDEXP-EXP2-NEXT:    [[EXP2:%.*]] = tail call <2 x double> @llvm.exp2.v2f64(<2 x double> [[MUL]])
 ; LDEXP-EXP2-NEXT:    ret <2 x double> [[EXP2]]
 ;
 ; LDEXP-NOEXP2-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_8(
 ; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) {
 ; LDEXP-NOEXP2-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x double>
-; LDEXP-NOEXP2-NEXT:    [[POW:%.*]] = tail call <2 x double> @llvm.pow.v2f64(<2 x double> <double 8.000000e+00, double 8.000000e+00>, <2 x double> [[ITOFP]])
+; LDEXP-NOEXP2-NEXT:    [[POW:%.*]] = tail call <2 x double> @llvm.pow.v2f64(<2 x double> splat (double 8.000000e+00), <2 x double> [[ITOFP]])
 ; LDEXP-NOEXP2-NEXT:    ret <2 x double> [[POW]]
 ;
 ; NOLDEXP-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_8(
 ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) {
 ; NOLDEXP-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x double>
-; NOLDEXP-NEXT:    [[MUL:%.*]] = fmul <2 x double> [[ITOFP]], <double 3.000000e+00, double 3.000000e+00>
+; NOLDEXP-NEXT:    [[MUL:%.*]] = fmul <2 x double> [[ITOFP]], splat (double 3.000000e+00)
 ; NOLDEXP-NEXT:    [[EXP2:%.*]] = tail call <2 x double> @llvm.exp2.v2f64(<2 x double> [[MUL]])
 ; NOLDEXP-NEXT:    ret <2 x double> [[EXP2]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int.ll b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
index e5546075edc9bb..4a75a84d7b66de 100644
--- a/llvm/test/Transforms/InstCombine/pow_fp_int.ll
+++ b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
@@ -531,7 +531,7 @@ define <2 x float> @pow_sitofp_const_base_2_no_fast_vector(<2 x i8> %x) {
 ; CHECK-LABEL: define <2 x float> @pow_sitofp_const_base_2_no_fast_vector(
 ; CHECK-SAME: <2 x i8> [[X:%.*]]) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32>
-; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
+; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> splat (float 1.000000e+00), <2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[EXP2]]
 ;
   %s = sitofp <2 x i8> %x to <2 x float>
diff --git a/llvm/test/Transforms/InstCombine/pr14365.ll b/llvm/test/Transforms/InstCombine/pr14365.ll
index 3a09b55aba309d..1a51c174733778 100644
--- a/llvm/test/Transforms/InstCombine/pr14365.ll
+++ b/llvm/test/Transforms/InstCombine/pr14365.ll
@@ -16,7 +16,7 @@ define i32 @test0(i32 %a0) {
 
 define <4 x i32> @test0_vec(<4 x i32> %a0) {
 ; CHECK-LABEL: @test0_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], <i32 -1431655766, i32 -1431655766, i32 -1431655766, i32 -1431655766>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], splat (i32 -1431655766)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = and <4 x i32> %a0, <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
@@ -44,8 +44,8 @@ define i32 @test1(i32 %a0) {
 
 define <4 x i32> @test1_vec(<4 x i32> %a0) {
 ; CHECK-LABEL: @test1_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 1431655765, i32 1431655765, i32 1431655765, i32 1431655765>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], splat (i32 1431655765)
 ; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw <4 x i32> [[A0]], [[TMP2]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/pr17827.ll b/llvm/test/Transforms/InstCombine/pr17827.ll
index 6c6110aa073a59..2f10bb5c7f25f0 100644
--- a/llvm/test/Transforms/InstCombine/pr17827.ll
+++ b/llvm/test/Transforms/InstCombine/pr17827.ll
@@ -49,9 +49,9 @@ define i1 @test_shift_and_cmp_changed1(i8 %p, i8 %q) {
 
 define <2 x i1> @test_shift_and_cmp_changed1_vec(<2 x i8> %p, <2 x i8> %q) {
 ; CHECK-LABEL: @test_shift_and_cmp_changed1_vec(
-; CHECK-NEXT:    [[ANDP:%.*]] = shl <2 x i8> [[P:%.*]], <i8 5, i8 5>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[ANDP]], <i8 -64, i8 -64>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], <i8 32, i8 32>
+; CHECK-NEXT:    [[ANDP:%.*]] = shl <2 x i8> [[P:%.*]], splat (i8 5)
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[ANDP]], splat (i8 -64)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i8> [[TMP1]], splat (i8 32)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %andp = and <2 x i8> %p, <i8 6, i8 6>
@@ -78,7 +78,7 @@ define i1 @test_shift_and_cmp_changed2(i8 %p) {
 
 define <2 x i1> @test_shift_and_cmp_changed2_vec(<2 x i8> %p) {
 ; CHECK-LABEL: @test_shift_and_cmp_changed2_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[P:%.*]], <i8 6, i8 6>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[P:%.*]], splat (i8 6)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/pr53357.ll b/llvm/test/Transforms/InstCombine/pr53357.ll
index 0ae690869c1c44..2292abd0c90688 100644
--- a/llvm/test/Transforms/InstCombine/pr53357.ll
+++ b/llvm/test/Transforms/InstCombine/pr53357.ll
@@ -20,7 +20,7 @@ define i32 @src(i32 noundef %0, i32 noundef %1) {
 define <2 x i32> @src_vec(<2 x i32> noundef %0, <2 x i32> noundef %1) {
 ; CHECK-LABEL: @src_vec(
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor <2 x i32> [[TMP1:%.*]], [[TMP0:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
 ;
   %3 = and <2 x i32> %1, %0
@@ -34,7 +34,7 @@ define <2 x i32> @src_vec(<2 x i32> noundef %0, <2 x i32> noundef %1) {
 define <2 x i32> @src_vec_poison(<2 x i32> noundef %0, <2 x i32> noundef %1) {
 ; CHECK-LABEL: @src_vec_poison(
 ; CHECK-NEXT:    [[TMP3:%.*]] = xor <2 x i32> [[TMP1:%.*]], [[TMP0:%.*]]
-; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
 ;
   %3 = and <2 x i32> %1, %0
diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll
index 24777b1b7f2085..bcbd78e23ed67f 100644
--- a/llvm/test/Transforms/InstCombine/ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/ptrmask.ll
@@ -23,7 +23,7 @@ define ptr @ptrmask_combine_consecutive_preserve_attrs(ptr %p0, i64 %m1) {
 define <2 x ptr> @ptrmask_combine_consecutive_preserve_attrs_vecs(<2 x ptr> %p0, <2 x i64> %m1) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_combine_consecutive_preserve_attrs_vecs
 ; CHECK-SAME: (<2 x ptr> [[P0:%.*]], <2 x i64> [[M1:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[M1]], <i64 12345, i64 12345>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[M1]], splat (i64 12345)
 ; CHECK-NEXT:    [[R:%.*]] = call align 128 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x ptr> [[R]]
 ;
@@ -114,7 +114,7 @@ define ptr addrspace(1) @ptrmask_combine_add_alignment2(ptr addrspace(1) align 3
 define <2 x ptr> @ptrmask_combine_add_alignment_vec(<2 x ptr> %p) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_combine_add_alignment_vec
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -96, i64 -96>)
+; CHECK-NEXT:    [[R:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -96))
 ; CHECK-NEXT:    ret <2 x ptr> [[R]]
 ;
   %r = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p, <2 x i64> <i64 -96, i64 -96>)
@@ -435,7 +435,7 @@ define <2 x ptr> @ptrmask_of_gep_vector_type_unimplemented(<2 x ptr> align 8 %p)
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_of_gep_vector_type_unimplemented
 ; CHECK-SAME: (<2 x ptr> align 8 [[P:%.*]]) {
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, <2 x ptr> [[P]], i64 17
-; CHECK-NEXT:    [[PM:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[GEP]], <2 x i64> <i64 -96, i64 -96>)
+; CHECK-NEXT:    [[PM:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[GEP]], <2 x i64> splat (i64 -96))
 ; CHECK-NEXT:    ret <2 x ptr> [[PM]]
 ;
   %gep = getelementptr i8, <2 x ptr> %p, i32 17
@@ -520,7 +520,7 @@ define ptr @ptrmask_is_useless4(i64 %i, i64 %m) {
 define <2 x ptr> @ptrmask_is_useless_vec(<2 x i64> %i, <2 x i64> %m) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_is_useless_vec
 ; CHECK-SAME: (<2 x i64> [[I:%.*]], <2 x i64> [[M:%.*]]) {
-; CHECK-NEXT:    [[I0:%.*]] = and <2 x i64> [[I]], <i64 31, i64 31>
+; CHECK-NEXT:    [[I0:%.*]] = and <2 x i64> [[I]], splat (i64 31)
 ; CHECK-NEXT:    [[P0:%.*]] = inttoptr <2 x i64> [[I0]] to <2 x ptr>
 ; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P0]], <2 x i64> [[M]])
 ; CHECK-NEXT:    ret <2 x ptr> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll
index e3bcfde4ad59ca..f505a0e99ec501 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-a.ll
@@ -57,9 +57,9 @@ declare void @use8xi64(<8 x i64>)
 define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[T1:%.*]] = shl nuw <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <8 x i64> [[T1]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
-; CHECK-NEXT:    [[T3:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T1:%.*]] = shl nuw <8 x i64> splat (i64 1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = add <8 x i64> [[T1]], splat (i64 -1)
+; CHECK-NEXT:    [[T3:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]]
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[NBITS]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
@@ -127,9 +127,9 @@ define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) {
 define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t3_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[T1:%.*]] = shl nuw <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <8 x i64> [[T1]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
-; CHECK-NEXT:    [[T3:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T1:%.*]] = shl nuw <8 x i64> splat (i64 1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = add <8 x i64> [[T1]], splat (i64 -1)
+; CHECK-NEXT:    [[T3:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]]
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[NBITS]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll
index 8c61e24a97f1d0..c25b873b14a2ee 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll
@@ -57,9 +57,9 @@ declare void @use8xi64(<8 x i64>)
 define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = xor <8 x i64> [[T1]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
-; CHECK-NEXT:    [[T3:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = xor <8 x i64> [[T1]], splat (i64 -1)
+; CHECK-NEXT:    [[T3:%.*]] = sub <8 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    [[T4:%.*]] = and <8 x i64> [[X:%.*]], [[T2]]
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[NBITS]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
@@ -128,8 +128,8 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t3_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -1, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0>
 ; CHECK-NEXT:    [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64>
-; CHECK-NEXT:    [[T2:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T1]]
-; CHECK-NEXT:    [[T3:%.*]] = xor <8 x i64> [[T2]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+; CHECK-NEXT:    [[T2:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T1]]
+; CHECK-NEXT:    [[T3:%.*]] = xor <8 x i64> [[T2]], splat (i64 -1)
 ; CHECK-NEXT:    [[T4:%.*]] = sub <8 x i32> <i32 33, i32 32, i32 33, i32 32, i32 32, i32 32, i32 32, i32 32>, [[NBITS]]
 ; CHECK-NEXT:    [[T5:%.*]] = and <8 x i64> [[X:%.*]], [[T3]]
 ; CHECK-NEXT:    call void @use8xi32(<8 x i32> [[T0]])
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll
index a78246781c7f9d..c82c4428c31fbe 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll
@@ -51,8 +51,8 @@ declare void @use8xi64(<8 x i64>)
 define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[T1:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32>
+; CHECK-NEXT:    [[T1:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -32)
 ; CHECK-NEXT:    [[T3:%.*]] = and <8 x i64> [[T1]], [[X:%.*]]
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll
index b79ab790975270..325612f0a6d08a 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll
@@ -55,9 +55,9 @@ declare void @use8xi64(<8 x i64>)
 define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
-; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]]
-; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32>
+; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i64> splat (i64 -1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]]
+; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -32)
 ; CHECK-NEXT:    [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]]
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
@@ -89,7 +89,7 @@ define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t2_vec_splat_poison(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
 ; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]]
 ; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 poison, i32 -32>
 ; CHECK-NEXT:    [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]]
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
@@ -122,7 +122,7 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t3_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
 ; CHECK-NEXT:    [[T1:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = lshr <8 x i64> splat (i64 -1), [[T0]]
 ; CHECK-NEXT:    [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 poison, i32 64>
 ; CHECK-NEXT:    [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]]
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll
index 097c5663c129c7..699e11dcf0358d 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-e.ll
@@ -52,7 +52,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
 ; CHECK-NEXT:    [[T1:%.*]] = shl <8 x i64> [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32>
+; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -32)
 ; CHECK-NEXT:    [[T3:%.*]] = lshr exact <8 x i64> [[T1]], [[T0]]
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll
index d8af0ca2199763..6163aa8c7ac0cf 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-f.ll
@@ -52,7 +52,7 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) {
 ; CHECK-LABEL: @t1_vec_splat(
 ; CHECK-NEXT:    [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64>
 ; CHECK-NEXT:    [[T1:%.*]] = shl <8 x i64> [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32>
+; CHECK-NEXT:    [[T2:%.*]] = add <8 x i32> [[NBITS]], splat (i32 -32)
 ; CHECK-NEXT:    [[T3:%.*]] = ashr exact <8 x i64> [[T1]], [[T0]]
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T0]])
 ; CHECK-NEXT:    call void @use8xi64(<8 x i64> [[T1]])
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll
index a7fd2717e7197c..53ccc50d968a8c 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-a.ll
@@ -100,10 +100,10 @@ declare void @use3xi32(<3 x i32>)
 
 define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) {
 ; CHECK-LABEL: @t3_vec_splat(
-; CHECK-NEXT:    [[T1:%.*]] = shl nuw <3 x i32> <i32 1, i32 1, i32 1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = add nsw <3 x i32> [[T1]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T1:%.*]] = shl nuw <3 x i32> splat (i32 1), [[NBITS:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = add nsw <3 x i32> [[T1]], splat (i32 -1)
 ; CHECK-NEXT:    [[T3:%.*]] = and <3 x i32> [[T2]], [[X:%.*]]
-; CHECK-NEXT:    [[T4:%.*]] = sub <3 x i32> <i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T4:%.*]] = sub <3 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[NBITS]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T2]])
@@ -129,8 +129,8 @@ define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) {
 define <3 x i32> @t4_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) {
 ; CHECK-LABEL: @t4_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = add <3 x i32> [[NBITS:%.*]], <i32 -1, i32 0, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl nuw <3 x i32> <i32 1, i32 1, i32 1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add nsw <3 x i32> [[T1]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T1:%.*]] = shl nuw <3 x i32> splat (i32 1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = add nsw <3 x i32> [[T1]], splat (i32 -1)
 ; CHECK-NEXT:    [[T3:%.*]] = and <3 x i32> [[T2]], [[X:%.*]]
 ; CHECK-NEXT:    [[T4:%.*]] = sub <3 x i32> <i32 33, i32 32, i32 32>, [[NBITS]]
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T0]])
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll
index e3c09813891163..98256ffbe53fb3 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll
@@ -100,10 +100,10 @@ declare void @use3xi32(<3 x i32>)
 
 define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) {
 ; CHECK-LABEL: @t3_vec_splat(
-; CHECK-NEXT:    [[T1:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = xor <3 x i32> [[T1]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T1:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]]
+; CHECK-NEXT:    [[T2:%.*]] = xor <3 x i32> [[T1]], splat (i32 -1)
 ; CHECK-NEXT:    [[T3:%.*]] = and <3 x i32> [[X:%.*]], [[T2]]
-; CHECK-NEXT:    [[T4:%.*]] = sub <3 x i32> <i32 32, i32 32, i32 32>, [[NBITS]]
+; CHECK-NEXT:    [[T4:%.*]] = sub <3 x i32> splat (i32 32), [[NBITS]]
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[NBITS]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T2]])
@@ -129,8 +129,8 @@ define <3 x i32> @t3_vec_splat(<3 x i32> %x, <3 x i32> %nbits) {
 define <3 x i32> @t4_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) {
 ; CHECK-LABEL: @t4_vec_nonsplat(
 ; CHECK-NEXT:    [[T0:%.*]] = add <3 x i32> [[NBITS:%.*]], <i32 -1, i32 0, i32 1>
-; CHECK-NEXT:    [[T1:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = xor <3 x i32> [[T1]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[T1:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[T0]]
+; CHECK-NEXT:    [[T2:%.*]] = xor <3 x i32> [[T1]], splat (i32 -1)
 ; CHECK-NEXT:    [[T3:%.*]] = and <3 x i32> [[X:%.*]], [[T2]]
 ; CHECK-NEXT:    [[T4:%.*]] = sub <3 x i32> <i32 33, i32 32, i32 32>, [[NBITS]]
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T0]])
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll
index 8428ef67d6b86b..f12e3e460860fb 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll
@@ -59,9 +59,9 @@ declare void @use3xi32(<3 x i32>)
 
 define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) {
 ; CHECK-LABEL: @t2_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS:%.*]]
 ; CHECK-NEXT:    [[T1:%.*]] = and <3 x i32> [[T0]], [[X:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[NBITS]], <i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[NBITS]], splat (i32 1)
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T2]])
@@ -80,7 +80,7 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) {
 
 define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) {
 ; CHECK-LABEL: @t3_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS:%.*]]
 ; CHECK-NEXT:    [[T1:%.*]] = and <3 x i32> [[T0]], [[X:%.*]]
 ; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[NBITS]], <i32 1, i32 0, i32 2>
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T0]])
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll
index 5d8ff9e9fb71bd..731de2b94cf269 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll
@@ -67,10 +67,10 @@ declare void @use3xi32(<3 x i32>)
 
 define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) {
 ; CHECK-LABEL: @t2_vec_splat(
-; CHECK-NEXT:    [[T0:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS]]
 ; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]]
-; CHECK-NEXT:    [[T3:%.*]] = add <3 x i32> [[NBITS]], <i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[T3:%.*]] = add <3 x i32> [[NBITS]], splat (i32 1)
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T2]])
@@ -92,8 +92,8 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) {
 
 define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) {
 ; CHECK-LABEL: @t3_vec_nonsplat(
-; CHECK-NEXT:    [[T0:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]]
+; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS]]
 ; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]]
 ; CHECK-NEXT:    [[T3:%.*]] = add <3 x i32> [[NBITS]], <i32 1, i32 0, i32 2>
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T0]])
@@ -118,7 +118,7 @@ define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) {
 define <3 x i32> @t4_vec_poison(<3 x i32> %x, <3 x i32> %nbits) {
 ; CHECK-LABEL: @t4_vec_poison(
 ; CHECK-NEXT:    [[T0:%.*]] = shl nsw <3 x i32> <i32 -1, i32 poison, i32 -1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS]]
+; CHECK-NEXT:    [[T1:%.*]] = lshr <3 x i32> splat (i32 -1), [[NBITS]]
 ; CHECK-NEXT:    [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]]
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T1]])
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll
index 74864e4473b114..1022a15fa04f56 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-e.ll
@@ -60,7 +60,7 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) {
 ; CHECK-LABEL: @t2_vec_splat(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], [[NBITS:%.*]]
 ; CHECK-NEXT:    [[T1:%.*]] = lshr exact <3 x i32> [[T0]], [[NBITS]]
-; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[NBITS]], <i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[NBITS]], splat (i32 1)
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T2]])
diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll
index 68626d5a2eabbe..4c03775ce75347 100644
--- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-f.ll
@@ -60,7 +60,7 @@ define <3 x i32> @t2_vec_splat(<3 x i32> %x, <3 x i32> %nbits) {
 ; CHECK-LABEL: @t2_vec_splat(
 ; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], [[NBITS:%.*]]
 ; CHECK-NEXT:    [[T1:%.*]] = ashr exact <3 x i32> [[T0]], [[NBITS]]
-; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[NBITS]], <i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[T2:%.*]] = add <3 x i32> [[NBITS]], splat (i32 1)
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T0]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T1]])
 ; CHECK-NEXT:    call void @use3xi32(<3 x i32> [[T2]])
diff --git a/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll b/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll
index 45880ac5efe51e..ca65e07e2d853a 100644
--- a/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll
+++ b/llvm/test/Transforms/InstCombine/redundant-right-shift-input-masking.ll
@@ -39,7 +39,7 @@ define i32 @t1_sshr(i32 %data, i32 %nbits) {
 
 define <4 x i32> @t2_vec(<4 x i32> %data, <4 x i32> %nbits) {
 ; CHECK-LABEL: @t2_vec(
-; CHECK-NEXT:    [[T0:%.*]] = shl nsw <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl nsw <4 x i32> splat (i32 -1), [[NBITS:%.*]]
 ; CHECK-NEXT:    [[T1:%.*]] = and <4 x i32> [[T0]], [[DATA:%.*]]
 ; CHECK-NEXT:    [[T2:%.*]] = lshr <4 x i32> [[T1]], [[NBITS]]
 ; CHECK-NEXT:    ret <4 x i32> [[T2]]
diff --git a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
index 45db2cf6758524..e7d6cc7102c713 100644
--- a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
+++ b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
@@ -98,7 +98,7 @@ define i8 @urem_XY_XZ_with_CY_lt_CZ_with_shl(i8 %X) {
 
 define <2 x i8> @urem_XY_XZ_with_CY_lt_CZ_with_nsw_out(<2 x i8> %X) {
 ; CHECK-LABEL: @urem_XY_XZ_with_CY_lt_CZ_with_nsw_out(
-; CHECK-NEXT:    [[R:%.*]] = shl nuw nsw <2 x i8> [[X:%.*]], <i8 2, i8 2>
+; CHECK-NEXT:    [[R:%.*]] = shl nuw nsw <2 x i8> [[X:%.*]], splat (i8 2)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %BO0 = shl nsw <2 x i8> %X, <i8 2, i8 2>
@@ -284,7 +284,7 @@ define i8 @srem_XY_XZ_with_CY_rem_CZ_eq_0_fail_missing_flag(i8 %X) {
 
 define <2 x i8> @srem_XY_XZ_with_CY_lt_CZ(<2 x i8> %X) {
 ; CHECK-LABEL: @srem_XY_XZ_with_CY_lt_CZ(
-; CHECK-NEXT:    [[R:%.*]] = shl nsw <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[R:%.*]] = shl nsw <2 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %BO0 = shl <2 x i8> %X, <i8 3, i8 3>
@@ -306,7 +306,7 @@ define i8 @srem_XY_XZ_with_CY_lt_CZ_with_nuw_out(i8 %X) {
 
 define <2 x i8> @srem_XY_XZ_with_CY_lt_CZ_with_nuw_out_with_shl(<2 x i8> %X) {
 ; CHECK-LABEL: @srem_XY_XZ_with_CY_lt_CZ_with_nuw_out_with_shl(
-; CHECK-NEXT:    [[R:%.*]] = shl nuw nsw <2 x i8> <i8 3, i8 3>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shl nuw nsw <2 x i8> splat (i8 3), [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %BO0 = shl nuw <2 x i8> <i8 3, i8 3>, %X
@@ -363,7 +363,7 @@ define i8 @srem_XY_XZ_with_CY_gt_CZ_with_nuw_out(i8 %X) {
 
 define <2 x i8> @srem_XY_XZ_with_CY_gt_CZ_no_nuw_out(<2 x i8> %X) {
 ; CHECK-LABEL: @srem_XY_XZ_with_CY_gt_CZ_no_nuw_out(
-; CHECK-NEXT:    [[R:%.*]] = shl nsw <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = shl nsw <2 x i8> [[X:%.*]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %BO0 = mul nsw <2 x i8> %X, <i8 10, i8 10>
diff --git a/llvm/test/Transforms/InstCombine/rem.ll b/llvm/test/Transforms/InstCombine/rem.ll
index 2cf56dfd50a876..d41d226746622f 100644
--- a/llvm/test/Transforms/InstCombine/rem.ll
+++ b/llvm/test/Transforms/InstCombine/rem.ll
@@ -79,7 +79,7 @@ define i8 @urem_with_sext_bool_divisor(i1 %x, i8 %y) {
 define <2 x i8> @urem_with_sext_bool_divisor_vec(<2 x i1> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @urem_with_sext_bool_divisor_vec(
 ; CHECK-NEXT:    [[Y_FROZEN:%.*]] = freeze <2 x i8> [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[Y_FROZEN]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[Y_FROZEN]], splat (i8 -1)
 ; CHECK-NEXT:    [[REM:%.*]] = select <2 x i1> [[TMP1]], <2 x i8> zeroinitializer, <2 x i8> [[Y_FROZEN]]
 ; CHECK-NEXT:    ret <2 x i8> [[REM]]
 ;
@@ -198,7 +198,7 @@ define i32 @test3(i32 %A) {
 
 define <2 x i32> @vec_power_of_2_constant_splat_divisor(<2 x i32> %A) {
 ; CHECK-LABEL: @vec_power_of_2_constant_splat_divisor(
-; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[A:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 7)
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %B = urem <2 x i32> %A, <i32 8, i32 8>
@@ -227,7 +227,7 @@ define i1 @test3a(i32 %A) {
 
 define <2 x i1> @test3a_vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test3a_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 7)
 ; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
@@ -669,8 +669,8 @@ define i32 @test22(i32 %A) {
 
 define <2 x i32> @test23(<2 x i32> %A) {
 ; CHECK-LABEL: @test23(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
-; CHECK-NEXT:    [[MUL:%.*]] = urem <2 x i32> [[AND]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647)
+; CHECK-NEXT:    [[MUL:%.*]] = urem <2 x i32> [[AND]], splat (i32 2147483647)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %and = and <2 x i32> %A, <i32 2147483647, i32 2147483647>
@@ -691,7 +691,7 @@ define i1 @test24(i32 %A) {
 
 define <2 x i1> @test24_vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test24_vec(
-; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647)
 ; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[B]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
@@ -713,7 +713,7 @@ define i1 @test25(i32 %A) {
 
 define <2 x i1> @test25_vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test25_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 2147483647)
 ; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
@@ -1033,7 +1033,7 @@ define i32 @urem_select_of_constants_divisor(i1 %b, i32 %x) {
 define <2 x i32> @PR62401(<2 x i1> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @PR62401(
 ; CHECK-NEXT:    [[Y_FROZEN:%.*]] = freeze <2 x i32> [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[Y_FROZEN]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[Y_FROZEN]], splat (i32 -1)
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> zeroinitializer, <2 x i32> [[Y_FROZEN]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll b/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll
index 301ead708a08f7..d12cebe292e391 100644
--- a/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll
@@ -63,8 +63,8 @@ define i32 @p3_sgt(i32 %x, i32 %y) {
 
 define <2 x i32> @p4_vec_splat_ult_65536(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p4_vec_splat_ult_65536(
-; CHECK-NEXT:    [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> <i32 65535, i32 65535>, <2 x i32> [[Y:%.*]]
+; CHECK-NEXT:    [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535)
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %t = icmp ult <2 x i32> %x, <i32 65536, i32 65536>
@@ -73,8 +73,8 @@ define <2 x i32> @p4_vec_splat_ult_65536(<2 x i32> %x, <2 x i32> %y) {
 }
 define <2 x i32> @p5_vec_splat_ugt(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p5_vec_splat_ugt(
-; CHECK-NEXT:    [[T_INV:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> <i32 65535, i32 65535>, <2 x i32> [[Y:%.*]]
+; CHECK-NEXT:    [[T_INV:%.*]] = icmp ult <2 x i32> [[X:%.*]], splat (i32 65535)
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %t = icmp ugt <2 x i32> %x, <i32 65534, i32 65534>
@@ -83,8 +83,8 @@ define <2 x i32> @p5_vec_splat_ugt(<2 x i32> %x, <2 x i32> %y) {
 }
 define <2 x i32> @p6_vec_splat_slt_65536(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p6_vec_splat_slt_65536(
-; CHECK-NEXT:    [[T_INV:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> <i32 65535, i32 65535>, <2 x i32> [[Y:%.*]]
+; CHECK-NEXT:    [[T_INV:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 65535)
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %t = icmp slt <2 x i32> %x, <i32 65536, i32 65536>
@@ -93,8 +93,8 @@ define <2 x i32> @p6_vec_splat_slt_65536(<2 x i32> %x, <2 x i32> %y) {
 }
 define <2 x i32> @p7_vec_splat_sgt(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p7_vec_splat_sgt(
-; CHECK-NEXT:    [[T_INV:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> <i32 65535, i32 65535>, <2 x i32> [[Y:%.*]]
+; CHECK-NEXT:    [[T_INV:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 65535)
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %t = icmp sgt <2 x i32> %x, <i32 65534, i32 65534>
@@ -106,8 +106,8 @@ define <2 x i32> @p7_vec_splat_sgt(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i32> @p8_vec_nonsplat_poison0(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p8_vec_nonsplat_poison0(
-; CHECK-NEXT:    [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> <i32 65535, i32 65535>, <2 x i32> [[Y:%.*]]
+; CHECK-NEXT:    [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535)
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> splat (i32 65535), <2 x i32> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %t = icmp ult <2 x i32> %x, <i32 65536, i32 poison>
@@ -116,7 +116,7 @@ define <2 x i32> @p8_vec_nonsplat_poison0(<2 x i32> %x, <2 x i32> %y) {
 }
 define <2 x i32> @p9_vec_nonsplat_poison1(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p9_vec_nonsplat_poison1(
-; CHECK-NEXT:    [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535)
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> <i32 65535, i32 poison>, <2 x i32> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
@@ -126,7 +126,7 @@ define <2 x i32> @p9_vec_nonsplat_poison1(<2 x i32> %x, <2 x i32> %y) {
 }
 define <2 x i32> @p10_vec_nonsplat_poison2(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @p10_vec_nonsplat_poison2(
-; CHECK-NEXT:    [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], splat (i32 65535)
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> <i32 65535, i32 poison>, <2 x i32> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
@@ -349,9 +349,9 @@ define i32 @ult_inf_loop(i32 %x) {
 
 define <2 x i32> @ult_inf_loop_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @ult_inf_loop_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 38, i32 38>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], <i32 -4, i32 -4>
-; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i32> <i32 -5, i32 -5>, <2 x i32> <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 38)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], splat (i32 -4)
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i32> splat (i32 -5), <2 x i32> splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i32> [[SEL]]
 ;
   %add = add <2 x i32> %x, <i32 42, i32 42>
diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
index 35d15ea0bea238..9c1506b2e3cc8a 100644
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -56,7 +56,7 @@ define i88 @rotl_i88_constant_commute(i88 %x) {
 
 define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) {
 ; CHECK-LABEL: @rotl_v2i16_constant_splat(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> <i16 1, i16 1>)
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1))
 ; CHECK-NEXT:    ret <2 x i16> [[R]]
 ;
   %shl = shl <2 x i16> %x, <i16 1, i16 1>
@@ -67,7 +67,7 @@ define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) {
 
 define <2 x i16> @rotl_v2i16_constant_splat_poison0(<2 x i16> %x) {
 ; CHECK-LABEL: @rotl_v2i16_constant_splat_poison0(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> <i16 1, i16 1>)
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1))
 ; CHECK-NEXT:    ret <2 x i16> [[R]]
 ;
   %shl = shl <2 x i16> %x, <i16 poison, i16 1>
@@ -78,7 +78,7 @@ define <2 x i16> @rotl_v2i16_constant_splat_poison0(<2 x i16> %x) {
 
 define <2 x i16> @rotl_v2i16_constant_splat_poison1(<2 x i16> %x) {
 ; CHECK-LABEL: @rotl_v2i16_constant_splat_poison1(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> <i16 1, i16 1>)
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> splat (i16 1))
 ; CHECK-NEXT:    ret <2 x i16> [[R]]
 ;
   %shl = shl <2 x i16> %x, <i16 1, i16 1>
@@ -238,7 +238,7 @@ define i8 @rotr_i8_commute(i8 %x, i8 %y) {
 
 define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @rotl_v4i32(
-; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, [[Y:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> splat (i32 32), [[Y:%.*]]
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y]]
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i32> [[X]], [[SUB]]
 ; CHECK-NEXT:    [[R:%.*]] = or <4 x i32> [[SHL]], [[SHR]]
diff --git a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
index e4dd2d10637d3e..68a621946374e8 100644
--- a/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
+++ b/llvm/test/Transforms/InstCombine/sadd-with-overflow.ll
@@ -51,7 +51,7 @@ define { i8, i1 } @no_fold_on_constant_add_overflow(i8 %x) {
 
 define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) {
 ; CHECK-LABEL: @fold_simple_splat_constant(
-; CHECK-NEXT:    [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 42, i32 42>)
+; CHECK-NEXT:    [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 42))
 ; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
 ;
   %a = add nsw <2 x i32> %x, <i32 12, i32 12>
@@ -62,7 +62,7 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) {
 define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) {
 ; CHECK-LABEL: @no_fold_splat_undef_constant(
 ; CHECK-NEXT:    [[A:%.*]] = add nsw <2 x i32> [[X:%.*]], <i32 12, i32 undef>
-; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 30, i32 30>)
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30))
 ; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
 ;
   %a = add nsw <2 x i32> %x, <i32 12, i32 undef>
@@ -73,7 +73,7 @@ define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) {
 define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @no_fold_splat_not_constant(
 ; CHECK-NEXT:    [[A:%.*]] = add nsw <2 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 30, i32 30>)
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30))
 ; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
 ;
   %a = add nsw <2 x i32> %x, %y
diff --git a/llvm/test/Transforms/InstCombine/sadd_sat.ll b/llvm/test/Transforms/InstCombine/sadd_sat.ll
index 1cce297122f8a1..d27e7aa28d62c9 100644
--- a/llvm/test/Transforms/InstCombine/sadd_sat.ll
+++ b/llvm/test/Transforms/InstCombine/sadd_sat.ll
@@ -405,8 +405,8 @@ define <4 x i32> @sadd_satv4i4(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @sadd_satv4i4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
-; CHECK-NEXT:    [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> <i32 -16, i32 -16, i32 -16, i32 -16>)
+; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> splat (i32 15))
+; CHECK-NEXT:    [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> splat (i32 -16))
 ; CHECK-NEXT:    ret <4 x i32> [[SPEC_STORE_SELECT8]]
 ;
 entry:
@@ -422,8 +422,8 @@ define <4 x i32> @ssub_satv4i4(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @ssub_satv4i4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ADD:%.*]] = sub <4 x i32> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
-; CHECK-NEXT:    [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> <i32 -16, i32 -16, i32 -16, i32 -16>)
+; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[ADD]], <4 x i32> splat (i32 15))
+; CHECK-NEXT:    [[SPEC_STORE_SELECT8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[SPEC_STORE_SELECT]], <4 x i32> splat (i32 -16))
 ; CHECK-NEXT:    ret <4 x i32> [[SPEC_STORE_SELECT8]]
 ;
 entry:
@@ -785,8 +785,8 @@ define <2 x i8> @ashrv2i8(<2 x i16> %a, <2 x i8> %b) {
 ; CHECK-NEXT:    [[CONV:%.*]] = ashr <2 x i16> [[A:%.*]], <i16 8, i16 12>
 ; CHECK-NEXT:    [[CONV1:%.*]] = sext <2 x i8> [[B:%.*]] to <2 x i16>
 ; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i16> [[CONV]], [[CONV1]]
-; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ADD]], <2 x i16> <i16 -128, i16 -128>)
-; CHECK-NEXT:    [[SPEC_STORE_SELECT8:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[SPEC_STORE_SELECT]], <2 x i16> <i16 127, i16 127>)
+; CHECK-NEXT:    [[SPEC_STORE_SELECT:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[ADD]], <2 x i16> splat (i16 -128))
+; CHECK-NEXT:    [[SPEC_STORE_SELECT8:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[SPEC_STORE_SELECT]], <2 x i16> splat (i16 127))
 ; CHECK-NEXT:    [[CONV7:%.*]] = trunc nsw <2 x i16> [[SPEC_STORE_SELECT8]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[CONV7]]
 ;
@@ -805,7 +805,7 @@ entry:
 define <2 x i8> @ashrv2i8_s(<2 x i16> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @ashrv2i8_s(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 8, i16 8>
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <2 x i16> [[A:%.*]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc nuw <2 x i16> [[TMP0]] to <2 x i8>
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[TMP1]], <2 x i8> [[B:%.*]])
 ; CHECK-NEXT:    ret <2 x i8> [[TMP2]]
diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
index a88fd3cc21f1bc..c8cdb4059c3391 100644
--- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll
@@ -61,7 +61,7 @@ define i8 @test_scalar_uadd_combine(i8 %a) {
 
 define <2 x i8> @test_vector_uadd_combine(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_uadd_combine(
-; CHECK-NEXT:    [[X2:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 30, i8 30>)
+; CHECK-NEXT:    [[X2:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 30))
 ; CHECK-NEXT:    ret <2 x i8> [[X2]]
 ;
   %x1 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 10>)
@@ -93,7 +93,7 @@ define i8 @test_scalar_uadd_overflow(i8 %a) {
 
 define <2 x i8> @test_vector_uadd_overflow(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_uadd_overflow(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %y1 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 100, i8 100>)
   %y2 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %y1, <2 x i8> <i8 200, i8 200>)
@@ -113,7 +113,7 @@ define i8 @test_scalar_sadd_both_positive(i8 %a) {
 
 define <2 x i8> @test_vector_sadd_both_positive(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_sadd_both_positive(
-; CHECK-NEXT:    [[Z2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 30, i8 30>)
+; CHECK-NEXT:    [[Z2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 30))
 ; CHECK-NEXT:    ret <2 x i8> [[Z2]]
 ;
   %z1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 10>)
@@ -133,7 +133,7 @@ define i8 @test_scalar_sadd_both_negative(i8 %a) {
 
 define <2 x i8> @test_vector_sadd_both_negative(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_sadd_both_negative(
-; CHECK-NEXT:    [[U2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 -30, i8 -30>)
+; CHECK-NEXT:    [[U2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 -30))
 ; CHECK-NEXT:    ret <2 x i8> [[U2]]
 ;
   %u1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 -10, i8 -10>)
@@ -177,7 +177,7 @@ define i8 @test_scalar_uadd_neg_neg(i8 %a) {
 
 define <2 x i8> @test_vector_uadd_neg_neg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_uadd_neg_neg(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %a_neg = or <2 x i8> %a, <i8 -128, i8 -128>
   %r = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_neg, <2 x i8> <i8 -10, i8 -20>)
@@ -198,7 +198,7 @@ define i8 @test_scalar_uadd_nneg_nneg(i8 %a) {
 
 define <2 x i8> @test_vector_uadd_nneg_nneg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_uadd_nneg_nneg(
-; CHECK-NEXT:    [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 127)
 ; CHECK-NEXT:    [[R:%.*]] = add nuw <2 x i8> [[A_NNEG]], <i8 10, i8 20>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -221,7 +221,7 @@ define i8 @test_scalar_uadd_neg_nneg(i8 %a) {
 
 define <2 x i8> @test_vector_uadd_neg_nneg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_uadd_neg_nneg(
-; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> <i8 10, i8 20>)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -243,8 +243,8 @@ define i8 @test_scalar_uadd_never_overflows(i8 %a) {
 
 define <2 x i8> @test_vector_uadd_never_overflows(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_uadd_never_overflows(
-; CHECK-NEXT:    [[A_MASKED:%.*]] = and <2 x i8> [[A:%.*]], <i8 -127, i8 -127>
-; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i8> [[A_MASKED]], <i8 1, i8 1>
+; CHECK-NEXT:    [[A_MASKED:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 -127)
+; CHECK-NEXT:    [[R:%.*]] = add nuw nsw <2 x i8> [[A_MASKED]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %a_masked = and <2 x i8> %a, <i8 129, i8 129>
@@ -263,7 +263,7 @@ define i8 @test_scalar_uadd_always_overflows(i8 %a) {
 
 define <2 x i8> @test_vector_uadd_always_overflows(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_uadd_always_overflows(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %a_masked = or <2 x i8> %a, <i8 192, i8 192>
   %r = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a_masked, <2 x i8> <i8 64, i8 64>)
@@ -284,7 +284,7 @@ define i8 @test_scalar_sadd_neg_nneg(i8 %a) {
 
 define <2 x i8> @test_vector_sadd_neg_nneg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_sadd_neg_nneg(
-; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[A_NEG]], <i8 10, i8 20>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -307,7 +307,7 @@ define i8 @test_scalar_sadd_nneg_neg(i8 %a) {
 
 define <2 x i8> @test_vector_sadd_nneg_neg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_sadd_nneg_neg(
-; CHECK-NEXT:    [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 127)
 ; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[A_NNEG]], <i8 -10, i8 -20>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -330,7 +330,7 @@ define i8 @test_scalar_sadd_neg_neg(i8 %a) {
 
 define <2 x i8> @test_vector_sadd_neg_neg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_sadd_neg_neg(
-; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> <i8 -10, i8 -20>)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -474,7 +474,7 @@ define i8 @test_scalar_ssub_canonical(i8 %a) {
 
 define <2 x i8> @test_vector_ssub_canonical(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_ssub_canonical(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 -10, i8 -10>)
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 -10))
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %r = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 10>)
@@ -550,7 +550,7 @@ define i8 @test_simplify_decrement_ne(i8 %a) {
 
 define <2 x i8> @test_simplify_decrement_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @test_simplify_decrement_vec(
-; CHECK-NEXT:    [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 1, i8 1>)
+; CHECK-NEXT:    [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 1))
 ; CHECK-NEXT:    ret <2 x i8> [[I2]]
 ;
   %i = icmp eq <2 x i8> %a, <i8 0, i8 0>
@@ -561,7 +561,7 @@ define <2 x i8> @test_simplify_decrement_vec(<2 x i8> %a) {
 
 define <2 x i8> @test_simplify_decrement_vec_poison(<2 x i8> %a) {
 ; CHECK-LABEL: @test_simplify_decrement_vec_poison(
-; CHECK-NEXT:    [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 1, i8 1>)
+; CHECK-NEXT:    [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 1))
 ; CHECK-NEXT:    ret <2 x i8> [[I2]]
 ;
   %i = icmp eq <2 x i8> %a, <i8 0, i8 0>
@@ -636,7 +636,7 @@ define i8 @test_invalid_simplify_other(i8 %a, i8 %b) {
 
 define <2 x i8> @test_vector_usub_combine(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_usub_combine(
-; CHECK-NEXT:    [[X2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 30, i8 30>)
+; CHECK-NEXT:    [[X2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 30))
 ; CHECK-NEXT:    ret <2 x i8> [[X2]]
 ;
   %x1 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 10>)
@@ -688,7 +688,7 @@ define i8 @test_scalar_ssub_both_positive(i8 %a) {
 
 define <2 x i8> @test_vector_ssub_both_positive(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_ssub_both_positive(
-; CHECK-NEXT:    [[Z2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 -30, i8 -30>)
+; CHECK-NEXT:    [[Z2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 -30))
 ; CHECK-NEXT:    ret <2 x i8> [[Z2]]
 ;
   %z1 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 10, i8 10>)
@@ -708,7 +708,7 @@ define i8 @test_scalar_ssub_both_negative(i8 %a) {
 
 define <2 x i8> @test_vector_ssub_both_negative(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_ssub_both_negative(
-; CHECK-NEXT:    [[U2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 30, i8 30>)
+; CHECK-NEXT:    [[U2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 30))
 ; CHECK-NEXT:    ret <2 x i8> [[U2]]
 ;
   %u1 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 -10, i8 -10>)
@@ -741,7 +741,7 @@ define i8 @test_scalar_sadd_ssub(i8 %a) {
 
 define <2 x i8> @test_vector_sadd_ssub(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_sadd_ssub(
-; CHECK-NEXT:    [[V2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 -30, i8 -30>)
+; CHECK-NEXT:    [[V2:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 -30))
 ; CHECK-NEXT:    ret <2 x i8> [[V2]]
 ;
   %v1 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> <i8 -10, i8 -10>, <2 x i8> %a)
@@ -794,7 +794,7 @@ define i8 @test_scalar_usub_neg_nneg(i8 %a) {
 
 define <2 x i8> @test_vector_usub_neg_nneg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_usub_neg_nneg(
-; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[A_NEG]], <i8 -10, i8 -20>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -817,7 +817,7 @@ define i8 @test_scalar_usub_nneg_nneg(i8 %a) {
 
 define <2 x i8> @test_vector_usub_nneg_nneg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_usub_nneg_nneg(
-; CHECK-NEXT:    [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 127)
 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A_NNEG]], <2 x i8> <i8 10, i8 20>)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -839,8 +839,8 @@ define i8 @test_scalar_usub_never_overflows(i8 %a) {
 
 define <2 x i8> @test_vector_usub_never_overflows(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_usub_never_overflows(
-; CHECK-NEXT:    [[A_MASKED:%.*]] = or <2 x i8> [[A:%.*]], <i8 64, i8 64>
-; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[A_MASKED]], <i8 -10, i8 -10>
+; CHECK-NEXT:    [[A_MASKED:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 64)
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[A_MASKED]], splat (i8 -10)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %a_masked = or <2 x i8> %a, <i8 64, i8 64>
@@ -880,7 +880,7 @@ define i8 @test_scalar_ssub_neg_neg(i8 %a) {
 
 define <2 x i8> @test_vector_ssub_neg_neg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_ssub_neg_neg(
-; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[A_NEG]], <i8 10, i8 20>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -903,7 +903,7 @@ define i8 @test_scalar_ssub_nneg_nneg(i8 %a) {
 
 define <2 x i8> @test_vector_ssub_nneg_nneg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_ssub_nneg_nneg(
-; CHECK-NEXT:    [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[A_NNEG:%.*]] = and <2 x i8> [[A:%.*]], splat (i8 127)
 ; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[A_NNEG]], <i8 -10, i8 -20>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -926,7 +926,7 @@ define i8 @test_scalar_ssub_neg_nneg(i8 %a) {
 
 define <2 x i8> @test_vector_ssub_neg_nneg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_ssub_neg_nneg(
-; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[A_NEG:%.*]] = or <2 x i8> [[A:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A_NEG]], <2 x i8> <i8 -10, i8 -20>)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -1044,7 +1044,7 @@ define i8 @test_scalar_usub_add_nuw_inferred(i8 %a) {
 
 define <2 x i8> @test_vector_usub_add_nuw_no_ov(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov(
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[A:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[A:%.*]], splat (i8 1)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %b = add nuw <2 x i8> %a, <i8 10, i8 10>
@@ -1076,7 +1076,7 @@ define <3 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat1_poison(<3 x i8> %a) {
 define <2 x i8> @test_vector_usub_add_nuw_no_ov_nonsplat2(<2 x i8> %a) {
 ; CHECK-LABEL: @test_vector_usub_add_nuw_no_ov_nonsplat2(
 ; CHECK-NEXT:    [[B:%.*]] = add nuw <2 x i8> [[A:%.*]], <i8 10, i8 9>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> <i8 9, i8 9>)
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[B]], <2 x i8> splat (i8 9))
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %b = add nuw <2 x i8> %a, <i8 10, i8 9>
@@ -1124,8 +1124,8 @@ define i8 @test_scalar_ssub_add_nsw_may_ov(i8 %a, i8 %b) {
 
 define <2 x i8> @test_vector_ssub_add_nsw_no_ov_splat(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_splat(
-; CHECK-NEXT:    [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], <i8 7, i8 7>
-; CHECK-NEXT:    [[BB:%.*]] = and <2 x i8> [[B:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], splat (i8 7)
+; CHECK-NEXT:    [[BB:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 7)
 ; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i8> [[AA]], [[BB]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -1137,7 +1137,7 @@ define <2 x i8> @test_vector_ssub_add_nsw_no_ov_splat(<2 x i8> %a, <2 x i8> %b)
 
 define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat1(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat1(
-; CHECK-NEXT:    [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], splat (i8 7)
 ; CHECK-NEXT:    [[BB:%.*]] = and <2 x i8> [[B:%.*]], <i8 7, i8 6>
 ; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i8> [[AA]], [[BB]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
@@ -1151,7 +1151,7 @@ define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat1(<2 x i8> %a, <2 x i8>
 define <2 x i8> @test_vector_ssub_add_nsw_no_ov_nonsplat2(<2 x i8> %a, <2 x i8> %b) {
 ; CHECK-LABEL: @test_vector_ssub_add_nsw_no_ov_nonsplat2(
 ; CHECK-NEXT:    [[AA:%.*]] = add nsw <2 x i8> [[A:%.*]], <i8 7, i8 8>
-; CHECK-NEXT:    [[BB:%.*]] = and <2 x i8> [[B:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[BB:%.*]] = and <2 x i8> [[B:%.*]], splat (i8 7)
 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[AA]], <2 x i8> [[BB]])
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -1459,7 +1459,7 @@ define i32 @uadd_sat_negative_one(i32 %x) {
 
 define <2 x i8> @uadd_sat_flipped4_vector(<2 x i8> %x) {
 ; CHECK-LABEL: @uadd_sat_flipped4_vector(
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 9, i8 9>)
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 9))
 ; CHECK-NEXT:    ret <2 x i8> [[COND]]
 ;
   %cmp = icmp ult <2 x i8> %x, <i8 -10, i8 -10>
@@ -1470,7 +1470,7 @@ define <2 x i8> @uadd_sat_flipped4_vector(<2 x i8> %x) {
 
 define <2 x i8> @uadd_sat_flipped4_poison_vector(<2 x i8> %x) {
 ; CHECK-LABEL: @uadd_sat_flipped4_poison_vector(
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 9, i8 9>)
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 9))
 ; CHECK-NEXT:    ret <2 x i8> [[COND]]
 ;
   %cmp = icmp ult <2 x i8> %x, <i8 -10, i8 poison>
@@ -1481,7 +1481,7 @@ define <2 x i8> @uadd_sat_flipped4_poison_vector(<2 x i8> %x) {
 
 define <2 x i8> @uadd_sat_flipped4_poison_vector_compare(<2 x i8> %x) {
 ; CHECK-LABEL: @uadd_sat_flipped4_poison_vector_compare(
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 9, i8 9>)
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 9))
 ; CHECK-NEXT:    ret <2 x i8> [[COND]]
 ;
   %cmp = icmp ult <2 x i8> %x, <i8 -10, i8 poison>
@@ -1492,7 +1492,7 @@ define <2 x i8> @uadd_sat_flipped4_poison_vector_compare(<2 x i8> %x) {
 
 define <2 x i8> @uadd_sat_flipped4_poison_vector_compare2(<2 x i8> %x) {
 ; CHECK-LABEL: @uadd_sat_flipped4_poison_vector_compare2(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %cmp = icmp ult <2 x i8> %x, <i8 -10, i8 poison>
   %add = add <2 x i8> %x, <i8 poison, i8 poison>
@@ -1878,7 +1878,7 @@ define i32 @uadd_sat_not_uge(i32 %x, i32 %y) {
 define <2 x i32> @uadd_sat_not_ugt_commute_add(<2 x i32> %x, <2 x i32> %yp) {
 ; CHECK-LABEL: @uadd_sat_not_ugt_commute_add(
 ; CHECK-NEXT:    [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], <i32 2442, i32 4242>
-; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]])
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
@@ -1935,7 +1935,7 @@ define <2 x i32> @uadd_sat_not_commute_select_ugt(<2 x i32> %xp, <2 x i32> %yp)
 ; CHECK-LABEL: @uadd_sat_not_commute_select_ugt(
 ; CHECK-NEXT:    [[X:%.*]] = urem <2 x i32> <i32 42, i32 -42>, [[XP:%.*]]
 ; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i32> <i32 12, i32 412>, [[YP:%.*]]
-; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i32> [[X]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i32> [[X]], splat (i32 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]])
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
@@ -2042,7 +2042,7 @@ define i32 @uadd_sat_canon_y_nuw(i32 %x, i32 %y) {
 
 define <4 x i32> @uadd_sat_constant_vec(<4 x i32> %x) {
 ; CHECK-LABEL: @uadd_sat_constant_vec(
-; CHECK-NEXT:    [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> <i32 42, i32 42, i32 42, i32 42>)
+; CHECK-NEXT:    [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> splat (i32 42))
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
@@ -2053,7 +2053,7 @@ define <4 x i32> @uadd_sat_constant_vec(<4 x i32> %x) {
 
 define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) {
 ; CHECK-LABEL: @uadd_sat_constant_vec_commute(
-; CHECK-NEXT:    [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> <i32 42, i32 42, i32 42, i32 42>)
+; CHECK-NEXT:    [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> splat (i32 42))
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
@@ -2064,7 +2064,7 @@ define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) {
 
 define <4 x i32> @uadd_sat_constant_vec_commute_undefs(<4 x i32> %x) {
 ; CHECK-LABEL: @uadd_sat_constant_vec_commute_undefs(
-; CHECK-NEXT:    [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> <i32 42, i32 42, i32 42, i32 42>)
+; CHECK-NEXT:    [[R:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> splat (i32 42))
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 poison>
@@ -2183,7 +2183,7 @@ define i32 @unsigned_sat_constant_using_min(i32 %x) {
 
 define <2 x i32> @unsigned_sat_constant_using_min_splat(<2 x i32> %x) {
 ; CHECK-LABEL: @unsigned_sat_constant_using_min_splat(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 -15, i32 -15>)
+; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 -15))
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %c = icmp ult <2 x i32> %x, <i32 14, i32 14>
diff --git a/llvm/test/Transforms/InstCombine/scalarization.ll b/llvm/test/Transforms/InstCombine/scalarization.ll
index 591437b72c1fc4..a6931b4c41d2d7 100644
--- a/llvm/test/Transforms/InstCombine/scalarization.ll
+++ b/llvm/test/Transforms/InstCombine/scalarization.ll
@@ -161,7 +161,7 @@ define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) {
 define i8 @extract_element_binop_splat_variable_index_may_trap(<4 x i8> %x, <4 x i8> %y, i32 %z) {
 ;
 ; CHECK-LABEL: @extract_element_binop_splat_variable_index_may_trap(
-; CHECK-NEXT:    [[B:%.*]] = sdiv <4 x i8> <i8 42, i8 42, i8 42, i8 42>, [[Y:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = sdiv <4 x i8> splat (i8 42), [[Y:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll b/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll
index 3184395ccb12b8..e3e4a0c4a8f9cb 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-canonicalize.ll
@@ -107,8 +107,8 @@ define i32 @sdiv_abs_nsw(i32 %x) {
 
 define <4 x i32> @sdiv_abs_nsw_vec(<4 x i32> %x) {
 ; CHECK-LABEL: @sdiv_abs_nsw_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 1), <4 x i32> splat (i32 -1)
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
   %a = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true)
diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
index 3451c5e626f308..a3133ef82d5525 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
@@ -27,7 +27,7 @@ define i8 @n1(i8 %x) {
 
 define <2 x i8> @t2_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @t2_vec_splat(
-; CHECK-NEXT:    [[DIV_NEG:%.*]] = ashr exact <2 x i8> [[X:%.*]], <i8 5, i8 5>
+; CHECK-NEXT:    [[DIV_NEG:%.*]] = ashr exact <2 x i8> [[X:%.*]], splat (i8 5)
 ; CHECK-NEXT:    [[DIV:%.*]] = sub nsw <2 x i8> zeroinitializer, [[DIV_NEG]]
 ; CHECK-NEXT:    ret <2 x i8> [[DIV]]
 ;
@@ -100,8 +100,8 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 
 define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec(
-; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 5, i8 5>
-; CHECK-NEXT:    [[D_NEG:%.*]] = ashr exact <2 x i8> [[A]], <i8 4, i8 4>
+; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 5)
+; CHECK-NEXT:    [[D_NEG:%.*]] = ashr exact <2 x i8> [[A]], splat (i8 4)
 ; CHECK-NEXT:    [[D:%.*]] = sub nsw <2 x i8> zeroinitializer, [[D_NEG]]
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll
index 36af8685f9ae39..f463f27acc882f 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-power-of-two.ll
@@ -37,7 +37,7 @@ define i8 @n2(i8 %x) {
 
 define <2 x i8> @t3_vec_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @t3_vec_splat(
-; CHECK-NEXT:    [[DIV:%.*]] = ashr exact <2 x i8> [[X:%.*]], <i8 5, i8 5>
+; CHECK-NEXT:    [[DIV:%.*]] = ashr exact <2 x i8> [[X:%.*]], splat (i8 5)
 ; CHECK-NEXT:    ret <2 x i8> [[DIV]]
 ;
   %div = sdiv exact <2 x i8> %x, <i8 32, i8 32>
@@ -144,8 +144,8 @@ define i8 @not_prove_exact_with_high_mask(i8 %x, i8 %y) {
 
 define <2 x i8> @prove_exact_with_high_mask_splat_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_splat_vec(
-; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], <i8 3, i8 3>
-; CHECK-NEXT:    [[D:%.*]] = ashr exact <2 x i8> [[A]], <i8 3, i8 3>
+; CHECK-NEXT:    [[A:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 3)
+; CHECK-NEXT:    [[D:%.*]] = ashr exact <2 x i8> [[A]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
   %a = shl <2 x i8> %x, <i8 3, i8 3>
diff --git a/llvm/test/Transforms/InstCombine/select-bitext.ll b/llvm/test/Transforms/InstCombine/select-bitext.ll
index 4c09448aa83391..553d5ae155153f 100644
--- a/llvm/test/Transforms/InstCombine/select-bitext.ll
+++ b/llvm/test/Transforms/InstCombine/select-bitext.ll
@@ -51,7 +51,7 @@ define i64 @sel_sext(i32 %a, i1 %cmp) {
 define <4 x i64> @sel_sext_vec(<4 x i32> %a, <4 x i1> %cmp) {
 ; CHECK-LABEL: @sel_sext_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sext <4 x i32> [[A:%.*]] to <4 x i64>
-; CHECK-NEXT:    [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> <i64 42, i64 42, i64 42, i64 42>
+; CHECK-NEXT:    [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> splat (i64 42)
 ; CHECK-NEXT:    ret <4 x i64> [[EXT]]
 ;
   %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
@@ -73,7 +73,7 @@ define i64 @sel_zext(i32 %a, i1 %cmp) {
 define <4 x i64> @sel_zext_vec(<4 x i32> %a, <4 x i1> %cmp) {
 ; CHECK-LABEL: @sel_zext_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i32> [[A:%.*]] to <4 x i64>
-; CHECK-NEXT:    [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> <i64 42, i64 42, i64 42, i64 42>
+; CHECK-NEXT:    [[EXT:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i64> [[TMP1]], <4 x i64> splat (i64 42)
 ; CHECK-NEXT:    ret <4 x i64> [[EXT]]
 ;
   %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> <i32 42, i32 42, i32 42, i32 42>
@@ -152,8 +152,8 @@ define i32 @trunc_sel_equal_sext(i32 %a, i1 %cmp) {
 
 define <2 x i32> @trunc_sel_equal_sext_vec(<2 x i32> %a, <2 x i1> %cmp) {
 ; CHECK-LABEL: @trunc_sel_equal_sext_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], <i32 16, i32 16>
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 16)
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact <2 x i32> [[TMP1]], splat (i32 16)
 ; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP2]], <2 x i32> <i32 42, i32 43>
 ; CHECK-NEXT:    ret <2 x i32> [[EXT]]
 ;
@@ -178,7 +178,7 @@ define i64 @trunc_sel_larger_zext(i32 %a, i1 %cmp) {
 
 define <2 x i64> @trunc_sel_larger_zext_vec(<2 x i32> %a, <2 x i1> %cmp) {
 ; CHECK-LABEL: @trunc_sel_larger_zext_vec(
-; CHECK-NEXT:    [[TRUNC_MASK:%.*]] = and <2 x i32> [[A:%.*]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[TRUNC_MASK:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg <2 x i32> [[TRUNC_MASK]] to <2 x i64>
 ; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i64> [[TMP1]], <2 x i64> <i64 42, i64 43>
 ; CHECK-NEXT:    ret <2 x i64> [[EXT]]
@@ -205,7 +205,7 @@ define i32 @trunc_sel_smaller_zext(i64 %a, i1 %cmp) {
 define <2 x i32> @trunc_sel_smaller_zext_vec(<2 x i64> %a, <2 x i1> %cmp) {
 ; CHECK-LABEL: @trunc_sel_smaller_zext_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 65535)
 ; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP2]], <2 x i32> <i32 42, i32 43>
 ; CHECK-NEXT:    ret <2 x i32> [[EXT]]
 ;
@@ -229,7 +229,7 @@ define i32 @trunc_sel_equal_zext(i32 %a, i1 %cmp) {
 
 define <2 x i32> @trunc_sel_equal_zext_vec(<2 x i32> %a, <2 x i1> %cmp) {
 ; CHECK-LABEL: @trunc_sel_equal_zext_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 65535)
 ; CHECK-NEXT:    [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP1]], <2 x i32> <i32 42, i32 43>
 ; CHECK-NEXT:    ret <2 x i32> [[EXT]]
 ;
@@ -578,7 +578,7 @@ define i32 @sext_true_val_must_be_all_ones(i1 %x) {
 
 define <2 x i32> @sext_true_val_must_be_all_ones_vec(<2 x i1> %x) {
 ; CHECK-LABEL: @sext_true_val_must_be_all_ones_vec(
-; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 42, i32 12>, !prof [[PROF0]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> splat (i32 -1), <2 x i32> <i32 42, i32 12>, !prof [[PROF0]]
 ; CHECK-NEXT:    ret <2 x i32> [[SEL]]
 ;
   %ext = sext <2 x i1> %x to <2 x i32>
@@ -598,7 +598,7 @@ define i32 @zext_true_val_must_be_one(i1 %x) {
 
 define <2 x i32> @zext_true_val_must_be_one_vec(<2 x i1> %x) {
 ; CHECK-LABEL: @zext_true_val_must_be_one_vec(
-; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> <i32 1, i32 1>, <2 x i32> <i32 42, i32 12>, !prof [[PROF0]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> splat (i32 1), <2 x i32> <i32 42, i32 12>, !prof [[PROF0]]
 ; CHECK-NEXT:    ret <2 x i32> [[SEL]]
 ;
   %ext = zext <2 x i1> %x to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/select-divrem.ll b/llvm/test/Transforms/InstCombine/select-divrem.ll
index e11afd7b543b20..a674f9c64b2001 100644
--- a/llvm/test/Transforms/InstCombine/select-divrem.ll
+++ b/llvm/test/Transforms/InstCombine/select-divrem.ll
@@ -285,7 +285,7 @@ define i32 @rem_euclid_wrong_operands_select(i32 %0) {
 
 define <2 x i32> @rem_euclid_vec(<2 x i32> %0) {
 ; CHECK-LABEL: @rem_euclid_vec(
-; CHECK-NEXT:    [[SEL:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 7, i32 7>
+; CHECK-NEXT:    [[SEL:%.*]] = and <2 x i32> [[TMP0:%.*]], splat (i32 7)
 ; CHECK-NEXT:    ret <2 x i32> [[SEL]]
 ;
   %rem = srem <2 x i32> %0, <i32 8, i32 8>
diff --git a/llvm/test/Transforms/InstCombine/select-extractelement.ll b/llvm/test/Transforms/InstCombine/select-extractelement.ll
index e5b4fe5051e103..621d278dee6e5f 100644
--- a/llvm/test/Transforms/InstCombine/select-extractelement.ll
+++ b/llvm/test/Transforms/InstCombine/select-extractelement.ll
@@ -215,7 +215,7 @@ define <4 x i32> @extract_cond_type_mismatch(<4 x i32> %x, <4 x i32> %y, <5 x i1
 
 define i32 @inf_loop_partial_undef(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @inf_loop_partial_undef(
-; CHECK-NEXT:    [[T5:%.*]] = add nsw <2 x i32> [[Y:%.*]], <i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[T5:%.*]] = add nsw <2 x i32> [[Y:%.*]], splat (i32 2147483647)
 ; CHECK-NEXT:    [[T6:%.*]] = icmp sge <2 x i32> [[T5]], [[X:%.*]]
 ; CHECK-NEXT:    [[AB:%.*]] = and <2 x i1> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[T7:%.*]] = select <2 x i1> [[AB]], <2 x i1> [[T6]], <2 x i1> <i1 true, i1 poison>
diff --git a/llvm/test/Transforms/InstCombine/select-icmp-and-zero-shl.ll b/llvm/test/Transforms/InstCombine/select-icmp-and-zero-shl.ll
index fe794a33ece2d2..d62edf2fe39330 100644
--- a/llvm/test/Transforms/InstCombine/select-icmp-and-zero-shl.ll
+++ b/llvm/test/Transforms/InstCombine/select-icmp-and-zero-shl.ll
@@ -17,7 +17,7 @@ define i32 @test_eq(i32 %x) {
 
 define <2 x i32> @test_eq_vect(<2 x i32> %x) {
 ; CHECK-LABEL: @test_eq_vect(
-; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %shl.mask = and <2 x i32> %x, <i32 1073741823, i32 1073741823>
@@ -41,7 +41,7 @@ define i32 @test_ne(i32 %x) {
 
 define <2 x i32> @test_ne_vect(<2 x i32> %x) {
 ; CHECK-LABEL: @test_ne_vect(
-; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[MUL]]
 ;
   %shl.mask = and <2 x i32> %x, <i32 1073741823, i32 1073741823>
diff --git a/llvm/test/Transforms/InstCombine/select-icmp-and.ll b/llvm/test/Transforms/InstCombine/select-icmp-and.ll
index a57a7c5e32e494..219a66c314a07d 100644
--- a/llvm/test/Transforms/InstCombine/select-icmp-and.ll
+++ b/llvm/test/Transforms/InstCombine/select-icmp-and.ll
@@ -40,8 +40,8 @@ define i32 @test35(i32 %x) {
 
 define <2 x i32> @test35vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test35vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> <i32 60, i32 60>, <2 x i32> <i32 100, i32 100>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> splat (i32 60), <2 x i32> splat (i32 100)
 ; CHECK-NEXT:    ret <2 x i32> [[COND]]
 ;
   %cmp = icmp sge <2 x i32> %x, <i32 0, i32 0>
@@ -77,7 +77,7 @@ define i32 @test36(i32 %x) {
 define <2 x i32> @test36vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test36vec(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt <2 x i32> [[X:%.*]], zeroinitializer
-; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> <i32 60, i32 60>, <2 x i32> <i32 100, i32 100>
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> splat (i32 60), <2 x i32> splat (i32 100)
 ; CHECK-NEXT:    ret <2 x i32> [[COND]]
 ;
   %cmp = icmp slt <2 x i32> %x, <i32 0, i32 0>
@@ -98,8 +98,8 @@ define i32 @test37(i32 %x) {
 
 define <2 x i32> @test37vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test37vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> <i32 1, i32 1>, <2 x i32> <i32 -1, i32 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[CMP]], <2 x i32> splat (i32 1), <2 x i32> splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i32> [[COND]]
 ;
   %cmp = icmp sgt <2 x i32> %x, <i32 -1, i32 -1>
@@ -122,9 +122,9 @@ define i32 @test65(i64 %x) {
 
 define <2 x i32> @test65vec(<2 x i64> %x) {
 ; CHECK-LABEL: @test65vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], <i64 16, i64 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], splat (i64 16)
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %1 = and <2 x i64> %x, <i64 16, i64 16>
@@ -148,9 +148,9 @@ define i32 @test66(i64 %x) {
 
 define <2 x i32> @test66vec(<2 x i64> %x) {
 ; CHECK-LABEL: @test66vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], <i64 4294967296, i64 4294967296>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[X:%.*]], splat (i64 4294967296)
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq <2 x i64> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %1 = and <2 x i64> %x, <i64 4294967296, i64 4294967296>
@@ -164,7 +164,7 @@ define <2 x i32> @test66vec_scalar_and(i64 %x) {
 ; CHECK-LABEL: @test66vec_scalar_and(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967296
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[DOTNOT]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %1 = and i64 %x, 4294967296
@@ -188,9 +188,9 @@ define i32 @test67(i16 %x) {
 
 define <2 x i32> @test67vec(<2 x i16> %x) {
 ; CHECK-LABEL: @test67vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 4, i16 4>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], splat (i16 4)
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq <2 x i16> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %1 = and <2 x i16> %x, <i16 4, i16 4>
@@ -214,9 +214,9 @@ define i32 @test71(i32 %x) {
 
 define <2 x i32> @test71vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test71vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 128, i32 128>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 128)
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> <i32 42, i32 42>, <2 x i32> <i32 40, i32 40>
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[DOTNOT]], <2 x i32> splat (i32 42), <2 x i32> splat (i32 40)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %1 = and <2 x i32> %x, <i32 128, i32 128>
@@ -240,9 +240,9 @@ define i32 @test72(i32 %x) {
 
 define <2 x i32> @test72vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test72vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 128, i32 128>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 128)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 40), <2 x i32> splat (i32 42)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %1 = and <2 x i32> %x, <i32 128, i32 128>
@@ -266,9 +266,9 @@ define i32 @test73(i32 %x) {
 
 define <2 x i32> @test73vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test73vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 128, i32 128>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 128)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> splat (i32 40), <2 x i32> splat (i32 42)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %1 = trunc <2 x i32> %x to <2 x i8>
@@ -290,8 +290,8 @@ define i32 @test74(i32 %x) {
 
 define <2 x i32> @test74vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test74vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 40, i32 40>, <2 x i32> <i32 42, i32 42>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1)
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> splat (i32 40), <2 x i32> splat (i32 42)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %1 = icmp sgt <2 x i32> %x, <i32 -1, i32 -1>
diff --git a/llvm/test/Transforms/InstCombine/select-of-bittest.ll b/llvm/test/Transforms/InstCombine/select-of-bittest.ll
index 0c7624018cb02c..4495e7f6ed067a 100644
--- a/llvm/test/Transforms/InstCombine/select-of-bittest.ll
+++ b/llvm/test/Transforms/InstCombine/select-of-bittest.ll
@@ -22,7 +22,7 @@ define i32 @and_lshr_and(i32 %arg) {
 
 define <2 x i32> @and_lshr_and_splatvec(<2 x i32> %arg) {
 ; CHECK-LABEL: @and_lshr_and_splatvec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], splat (i32 3)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[T4:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T4]]
@@ -111,7 +111,7 @@ define i32 @and_and(i32 %arg) {
 
 define <2 x i32> @and_and_splatvec(<2 x i32> %arg) {
 ; CHECK-LABEL: @and_and_splatvec(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[ARG:%.*]], splat (i32 3)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[T3:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
@@ -190,7 +190,7 @@ define i32 @f_var0_commutative_and(i32 %arg, i32 %arg1) {
 
 define <2 x i32> @f_var0_splatvec(<2 x i32> %arg, <2 x i32> %arg1) {
 ; CHECK-LABEL: @f_var0_splatvec(
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[ARG:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    [[T5:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
@@ -270,7 +270,7 @@ define i32 @f_var1_commutative_and(i32 %arg, i32 %arg1) {
 
 define <2 x i32> @f_var1_vec(<2 x i32> %arg, <2 x i32> %arg1) {
 ; CHECK-LABEL: @f_var1_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[ARG1:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[ARG:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    [[T4:%.*]] = zext <2 x i1> [[TMP3]] to <2 x i32>
@@ -285,7 +285,7 @@ define <2 x i32> @f_var1_vec(<2 x i32> %arg, <2 x i32> %arg1) {
 
 define <3 x i32> @f_var1_vec_poison(<3 x i32> %arg, <3 x i32> %arg1) {
 ; CHECK-LABEL: @f_var1_vec_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], <i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[ARG:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    [[T4:%.*]] = zext <3 x i1> [[TMP3]] to <3 x i32>
@@ -321,11 +321,11 @@ define i32 @f_var2(i32 %arg, i32 %arg1) {
 
 define <2 x i32> @f_var2_splatvec(<2 x i32> %arg, <2 x i32> %arg1) {
 ; CHECK-LABEL: @f_var2_splatvec(
-; CHECK-NEXT:    [[T:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[T:%.*]] = and <2 x i32> [[ARG:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T]], zeroinitializer
 ; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i32> [[ARG]], [[ARG1:%.*]]
-; CHECK-NEXT:    [[T4:%.*]] = and <2 x i32> [[T3]], <i32 1, i32 1>
-; CHECK-NEXT:    [[T5:%.*]] = select <2 x i1> [[T2]], <2 x i32> [[T4]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[T4:%.*]] = and <2 x i32> [[T3]], splat (i32 1)
+; CHECK-NEXT:    [[T5:%.*]] = select <2 x i1> [[T2]], <2 x i32> [[T4]], <2 x i32> splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[T5]]
 ;
   %t = and <2 x i32> %arg, <i32 1, i32 1>
@@ -341,8 +341,8 @@ define <2 x i32> @f_var2_vec(<2 x i32> %arg, <2 x i32> %arg1) {
 ; CHECK-NEXT:    [[T:%.*]] = and <2 x i32> [[ARG:%.*]], <i32 2, i32 1>
 ; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T]], zeroinitializer
 ; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i32> [[ARG]], [[ARG1:%.*]]
-; CHECK-NEXT:    [[T4:%.*]] = and <2 x i32> [[T3]], <i32 1, i32 1>
-; CHECK-NEXT:    [[T5:%.*]] = select <2 x i1> [[T2]], <2 x i32> [[T4]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[T4:%.*]] = and <2 x i32> [[T3]], splat (i32 1)
+; CHECK-NEXT:    [[T5:%.*]] = select <2 x i1> [[T2]], <2 x i32> [[T4]], <2 x i32> splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[T5]]
 ;
   %t = and <2 x i32> %arg, <i32 2, i32 1>; mask is not splat
@@ -414,8 +414,8 @@ define <2 x i32> @f_var3_splatvec(<2 x i32> %arg, <2 x i32> %arg1, <2 x i32> %ar
 ; CHECK-NEXT:    [[T:%.*]] = and <2 x i32> [[ARG:%.*]], [[ARG1:%.*]]
 ; CHECK-NEXT:    [[T3:%.*]] = icmp eq <2 x i32> [[T]], zeroinitializer
 ; CHECK-NEXT:    [[T4:%.*]] = lshr <2 x i32> [[ARG]], [[ARG2:%.*]]
-; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[T4]], <i32 1, i32 1>
-; CHECK-NEXT:    [[T6:%.*]] = select <2 x i1> [[T3]], <2 x i32> [[T5]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[T4]], splat (i32 1)
+; CHECK-NEXT:    [[T6:%.*]] = select <2 x i1> [[T3]], <2 x i32> [[T5]], <2 x i32> splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[T6]]
 ;
   %t = and <2 x i32> %arg, %arg1
diff --git a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
index e0306972e48e2c..d726b5cd8ce1a3 100644
--- a/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
+++ b/llvm/test/Transforms/InstCombine/select-safe-transforms.ll
@@ -223,8 +223,8 @@ define i1 @andn_or_cmp_2_partial_logical_commute(i16 %a, i16 %b) {
 
 define <2 x i1> @not_logical_or(i1 %b, <2 x i32> %a) {
 ; CHECK-LABEL: @not_logical_or(
-; CHECK-NEXT:    [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], <i32 3, i32 3>
-; CHECK-NEXT:    [[IMPLIED:%.*]] = icmp slt <2 x i32> [[A]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 3)
+; CHECK-NEXT:    [[IMPLIED:%.*]] = icmp slt <2 x i32> [[A]], splat (i32 -1)
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[B:%.*]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[IMPLIED]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <2 x i1> [[COND]], <2 x i1> [[OR]], <2 x i1> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[AND]]
@@ -240,8 +240,8 @@ define <2 x i1> @not_logical_or(i1 %b, <2 x i32> %a) {
 
 define <2 x i1> @not_logical_or2(i1 %b, <2 x i32> %a) {
 ; CHECK-LABEL: @not_logical_or2(
-; CHECK-NEXT:    [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], <i32 3, i32 3>
-; CHECK-NEXT:    [[IMPLIED:%.*]] = icmp slt <2 x i32> [[A]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 3)
+; CHECK-NEXT:    [[IMPLIED:%.*]] = icmp slt <2 x i32> [[A]], splat (i32 -1)
 ; CHECK-NEXT:    [[OR:%.*]] = select i1 [[B:%.*]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[IMPLIED]]
 ; CHECK-NEXT:    [[AND:%.*]] = select <2 x i1> [[OR]], <2 x i1> [[COND]], <2 x i1> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[AND]]
@@ -764,8 +764,8 @@ define i1 @orn_and_cmp_2_partial_logical_commute(i16 %a, i16 %b) {
 
 define <2 x i1> @not_logical_and(i1 %b, <2 x i32> %a) {
 ; CHECK-LABEL: @not_logical_and(
-; CHECK-NEXT:    [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], <i32 3, i32 3>
-; CHECK-NEXT:    [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], <i32 1, i32 1>
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 3)
+; CHECK-NEXT:    [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], splat (i32 1)
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[B:%.*]], <2 x i1> [[COND]], <2 x i1> zeroinitializer
 ; CHECK-NEXT:    [[OR:%.*]] = select <2 x i1> [[IMPLIED]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[AND]]
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
@@ -781,8 +781,8 @@ define <2 x i1> @not_logical_and(i1 %b, <2 x i32> %a) {
 
 define <2 x i1> @not_logical_and2(i1 %b, <2 x i32> %a) {
 ; CHECK-LABEL: @not_logical_and2(
-; CHECK-NEXT:    [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], <i32 3, i32 3>
-; CHECK-NEXT:    [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], <i32 1, i32 1>
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 3)
+; CHECK-NEXT:    [[IMPLIED:%.*]] = icmp ugt <2 x i32> [[A]], splat (i32 1)
 ; CHECK-NEXT:    [[AND:%.*]] = select i1 [[B:%.*]], <2 x i1> [[COND]], <2 x i1> zeroinitializer
 ; CHECK-NEXT:    [[OR:%.*]] = select <2 x i1> [[AND]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[IMPLIED]]
 ; CHECK-NEXT:    ret <2 x i1> [[OR]]
diff --git a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll
index 1647233595b37e..30e763ccea5900 100644
--- a/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll
+++ b/llvm/test/Transforms/InstCombine/select-with-bitwise-ops.ll
@@ -22,8 +22,8 @@ define i32 @select_icmp_eq_and_1_0_or_2(i32 %x, i32 %y) {
 
 define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec(
-; CHECK-NEXT:    [[AND:%.*]] = shl <2 x i32> [[X:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[AND:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2)
 ; CHECK-NEXT:    [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
 ;
@@ -37,7 +37,7 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec(<2 x i32> %x, <2 x i32> %y) {
 define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison1(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec_poison1(
 ; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 1, i32 poison>
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw <2 x i32> [[AND]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw <2 x i32> [[AND]], splat (i32 1)
 ; CHECK-NEXT:    [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
 ;
@@ -50,8 +50,8 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison1(<2 x i32> %x, <2 x i32
 
 define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison2(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec_poison2(
-; CHECK-NEXT:    [[AND:%.*]] = shl <2 x i32> [[X:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[AND:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2)
 ; CHECK-NEXT:    [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
 ;
@@ -64,8 +64,8 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison2(<2 x i32> %x, <2 x i32
 
 define <2 x i32> @select_icmp_eq_and_1_0_or_2_vec_poison3(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_2_vec_poison3(
-; CHECK-NEXT:    [[AND:%.*]] = shl <2 x i32> [[X:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[AND:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2)
 ; CHECK-NEXT:    [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
 ;
@@ -121,8 +121,8 @@ define i32 @select_icmp_eq_and_32_0_or_8(i32 %x, i32 %y) {
 
 define <2 x i32> @select_icmp_eq_and_32_0_or_8_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_icmp_eq_and_32_0_or_8_vec(
-; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 2, i32 2>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], <i32 8, i32 8>
+; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 8)
 ; CHECK-NEXT:    [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
 ;
@@ -178,8 +178,8 @@ define i32 @select_icmp_ne_0_and_4096_or_4096(i32 %x, i32 %y) {
 
 define <2 x i32> @select_icmp_ne_0_and_4096_or_4096_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_icmp_ne_0_and_4096_or_4096_vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 4096, i32 4096>
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[AND]], <i32 4096, i32 4096>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 4096)
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[AND]], splat (i32 4096)
 ; CHECK-NEXT:    [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
 ;
@@ -234,7 +234,7 @@ define i32 @select_icmp_eq_and_4096_0_or_4096(i32 %x, i32 %y) {
 
 define <2 x i32> @select_icmp_eq_and_4096_0_or_4096_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_icmp_eq_and_4096_0_or_4096_vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 4096, i32 4096>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 4096)
 ; CHECK-NEXT:    [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[AND]]
 ; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
 ;
@@ -290,7 +290,7 @@ define i32 @select_icmp_eq_0_and_1_or_1(i64 %x, i32 %y) {
 define <2 x i32> @select_icmp_eq_0_and_1_or_1_vec(<2 x i64> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_icmp_eq_0_and_1_or_1_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
 ;
@@ -392,9 +392,9 @@ define i32 @select_icmp_ne_0_and_32_or_4096(i32 %x, i32 %y) {
 
 define <2 x i32> @select_icmp_ne_0_and_32_or_4096_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_icmp_ne_0_and_32_or_4096_vec(
-; CHECK-NEXT:    [[AND:%.*]] = shl <2 x i32> [[X:%.*]], <i32 7, i32 7>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], <i32 4096, i32 4096>
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], <i32 4096, i32 4096>
+; CHECK-NEXT:    [[AND:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 7)
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 4096)
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 4096)
 ; CHECK-NEXT:    [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
 ;
@@ -531,7 +531,7 @@ define <2 x i32> @select_icmp_eq_and_1_0_or_vector_of_2s(i32 %x, <2 x i32> %y) {
 ; CHECK-LABEL: @select_icmp_eq_and_1_0_or_vector_of_2s(
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X:%.*]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[Y:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[Y:%.*]], splat (i32 2)
 ; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x i32> [[Y]], <2 x i32> [[OR]]
 ; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
 ;
@@ -612,8 +612,8 @@ define i64 @select_icmp_x_and_8_ne_0_y_or_8(i32 %x, i64 %y) {
 
 define <2 x i64> @select_icmp_x_and_8_ne_0_y_or_8_vec(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: @select_icmp_x_and_8_ne_0_y_or_8_vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 8, i32 8>
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[AND]], <i32 8, i32 8>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[AND]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    [[OR_Y:%.*]] = or <2 x i64> [[Y:%.*]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i64> [[OR_Y]]
@@ -692,8 +692,8 @@ define i32 @test68(i32 %x, i32 %y) {
 
 define <2 x i32> @test68vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @test68vec(
-; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 6, i32 6>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2)
 ; CHECK-NEXT:    [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
 ;
@@ -750,9 +750,9 @@ define i32 @test69(i32 %x, i32 %y) {
 
 define <2 x i32> @test69vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @test69vec(
-; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 6, i32 6>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], <i32 2, i32 2>
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], <i32 2, i32 2>
+; CHECK-NEXT:    [[AND:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 6)
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[AND]], splat (i32 2)
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 2)
 ; CHECK-NEXT:    [[SELECT:%.*]] = or <2 x i32> [[Y:%.*]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
 ;
@@ -1563,8 +1563,8 @@ define i8 @set_bits_extra_use2(i8 %x, i1 %b)  {
 
 define <2 x i8> @clear_bits(<2 x i8> %x, <2 x i1> %b)  {
 ; CHECK-LABEL: @clear_bits(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 37, i8 37>
-; CHECK-NEXT:    [[MASKSEL:%.*]] = select <2 x i1> [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> <i8 -38, i8 -38>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 37)
+; CHECK-NEXT:    [[MASKSEL:%.*]] = select <2 x i1> [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> splat (i8 -38)
 ; CHECK-NEXT:    [[COND:%.*]] = or disjoint <2 x i8> [[AND]], [[MASKSEL]]
 ; CHECK-NEXT:    ret <2 x i8> [[COND]]
 ;
@@ -1579,7 +1579,7 @@ define <2 x i8> @clear_bits(<2 x i8> %x, <2 x i1> %b)  {
 define <2 x i8> @clear_bits_not_inverse_constant(<2 x i8> %x, <2 x i1> %b)  {
 ; CHECK-LABEL: @clear_bits_not_inverse_constant(
 ; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 undef, i8 37>
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i8> [[X]], <i8 -38, i8 -38>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i8> [[X]], splat (i8 -38)
 ; CHECK-NEXT:    [[COND:%.*]] = select <2 x i1> [[B:%.*]], <2 x i8> [[AND]], <2 x i8> [[OR]]
 ; CHECK-NEXT:    ret <2 x i8> [[COND]]
 ;
@@ -1591,9 +1591,9 @@ define <2 x i8> @clear_bits_not_inverse_constant(<2 x i8> %x, <2 x i1> %b)  {
 
 define <2 x i8> @clear_bits_extra_use1(<2 x i8> %x, i1 %b)  {
 ; CHECK-LABEL: @clear_bits_extra_use1(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 37, i8 37>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 37)
 ; CHECK-NEXT:    call void @use_vec(<2 x i8> [[AND]])
-; CHECK-NEXT:    [[MASKSEL:%.*]] = select i1 [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> <i8 -38, i8 -38>
+; CHECK-NEXT:    [[MASKSEL:%.*]] = select i1 [[B:%.*]], <2 x i8> zeroinitializer, <2 x i8> splat (i8 -38)
 ; CHECK-NEXT:    [[COND:%.*]] = or disjoint <2 x i8> [[AND]], [[MASKSEL]]
 ; CHECK-NEXT:    ret <2 x i8> [[COND]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 7d62b419424405..7a0831ee678862 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -1447,7 +1447,7 @@ define i32 @select_icmp_slt0_xor(i32 %x) {
 
 define <2 x i32> @select_icmp_slt0_xor_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @select_icmp_slt0_xor_vec(
-; CHECK-NEXT:    [[X_XOR:%.*]] = or <2 x i32> [[X:%.*]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[X_XOR:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 -2147483648)
 ; CHECK-NEXT:    ret <2 x i32> [[X_XOR]]
 ;
   %cmp = icmp slt <2 x i32> %x, zeroinitializer
@@ -1531,7 +1531,7 @@ define <4 x float> @PR33721(<4 x float> %w) {
 ; CHECK-LABEL: @PR33721(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = fcmp ole <4 x float> [[W:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, <4 x float> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x float> splat (float 0xFFFFFFFFE0000000), <4 x float> zeroinitializer
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
 entry:
@@ -1603,7 +1603,7 @@ define i32 @test_shl_zext_bool(i1 %t) {
 
 define <2 x i32> @test_shl_zext_bool_splat(<2 x i1> %t) {
 ; CHECK-LABEL: @test_shl_zext_bool_splat(
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T:%.*]], <2 x i32> <i32 8, i32 8>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[T:%.*]], <2 x i32> splat (i32 8), <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %r = select <2 x i1> %t, <2 x i32> <i32 8, i32 8>, <2 x i32> zeroinitializer
@@ -1644,7 +1644,7 @@ define float @copysign1_fmf(float %x) {
 define <2 x float> @copysign2(<2 x float> %x) {
 ; CHECK-LABEL: @copysign2(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> <float 4.200000e+01, float 4.200000e+01>, <2 x float> [[TMP1]])
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 4.200000e+01), <2 x float> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
   %i = bitcast <2 x float> %x to <2 x i32>
@@ -1668,7 +1668,7 @@ define float @copysign3(float %x) {
 define <2 x float> @copysign_vec_poison(<2 x float> %x) {
 ; CHECK-LABEL: @copysign_vec_poison(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fneg <2 x float> [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> <float 4.200000e+01, float 4.200000e+01>, <2 x float> [[TMP1]])
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 4.200000e+01), <2 x float> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
   %i = bitcast <2 x float> %x to <2 x i32>
@@ -1679,7 +1679,7 @@ define <2 x float> @copysign_vec_poison(<2 x float> %x) {
 
 define <2 x float> @copysign_vec_poison1(<2 x float> %x) {
 ; CHECK-LABEL: @copysign_vec_poison1(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> <float 4.200000e+01, float 4.200000e+01>, <2 x float> [[X:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 4.200000e+01), <2 x float> [[X:%.*]])
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
   %i = bitcast <2 x float> %x to <2 x i32>
@@ -1690,7 +1690,7 @@ define <2 x float> @copysign_vec_poison1(<2 x float> %x) {
 
 define <2 x float> @copysign_vec_poison3(<2 x float> %x) {
 ; CHECK-LABEL: @copysign_vec_poison3(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> <float 4.200000e+01, float 4.200000e+01>, <2 x float> [[X:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = call <2 x float> @llvm.copysign.v2f32(<2 x float> splat (float 4.200000e+01), <2 x float> [[X:%.*]])
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
   %i = bitcast <2 x float> %x to <2 x i32>
@@ -1739,7 +1739,7 @@ define <2 x float> @copysign_type_mismatch2(<2 x float> %x) {
 ; CHECK-LABEL: @copysign_type_mismatch2(
 ; CHECK-NEXT:    [[I:%.*]] = bitcast <2 x float> [[X:%.*]] to i64
 ; CHECK-NEXT:    [[ISPOS:%.*]] = icmp sgt i64 [[I]], -1
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[ISPOS]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> <float -1.000000e+00, float -1.000000e+00>
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[ISPOS]], <2 x float> splat (float 1.000000e+00), <2 x float> splat (float -1.000000e+00)
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
   %i = bitcast <2 x float> %x to i64
@@ -2762,8 +2762,8 @@ define i8 @select_replacement_add_eq(i8 %x, i8 %y) {
 
 define <2 x i8> @select_replacement_add_eq_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @select_replacement_add_eq_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 1, i8 1>
-; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 2, i8 2>, <2 x i8> [[Y:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 1)
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> splat (i8 2), <2 x i8> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[SEL]]
 ;
   %cmp = icmp eq <2 x i8> %x, <i8 1, i8 1>
@@ -2799,7 +2799,7 @@ define <2 x i8> @select_replacement_add_eq_vec_poison(<2 x i8> %x, <2 x i8> %y)
 define <2 x i8> @select_replacement_add_eq_vec_undef(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @select_replacement_add_eq_vec_undef(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 1, i8 undef>
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[X]], <i8 1, i8 1>
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[X]], splat (i8 1)
 ; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[ADD]], <2 x i8> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[SEL]]
 ;
@@ -2811,7 +2811,7 @@ define <2 x i8> @select_replacement_add_eq_vec_undef(<2 x i8> %x, <2 x i8> %y) {
 
 define <2 x i8> @select_replacement_add_eq_vec_undef_okay(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @select_replacement_add_eq_vec_undef_okay(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[CMP]], <2 x i8> <i8 2, i8 undef>, <2 x i8> [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[SEL]]
 ;
@@ -3370,7 +3370,7 @@ define i32 @select_cond_zext_cond(i1 %cond, i32 %b) {
 
 define <2 x i32> @select_cond_zext_cond_vec(<2 x i1> %cond, <2 x i32> %b) {
 ; CHECK-LABEL: @select_cond_zext_cond_vec(
-; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> <i32 1, i32 1>, <2 x i32> [[B:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> splat (i32 1), <2 x i32> [[B:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[SEL]]
 ;
   %zext = zext <2 x i1> %cond to <2 x i32>
@@ -3390,7 +3390,7 @@ define i32 @select_cond_sext_cond(i1 %cond, i32 %b) {
 
 define <2 x i32> @select_cond_sext_cond_vec(<2 x i1> %cond, <2 x i32> %b) {
 ; CHECK-LABEL: @select_cond_sext_cond_vec(
-; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[B:%.*]]
+; CHECK-NEXT:    [[SEL:%.*]] = select <2 x i1> [[COND:%.*]], <2 x i32> splat (i32 -1), <2 x i32> [[B:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[SEL]]
 ;
   %sext = sext <2 x i1> %cond to <2 x i32>
@@ -3582,7 +3582,7 @@ define i32 @not_clamp_umin2(i32 %x) {
 
 define <2 x i8> @clamp_umaxval(<2 x i8> %x) {
 ; CHECK-LABEL: @clamp_umaxval(
-; CHECK-NEXT:    [[SEL:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 -2, i8 -2>)
+; CHECK-NEXT:    [[SEL:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 -2))
 ; CHECK-NEXT:    ret <2 x i8> [[SEL]]
 ;
   %cmp = icmp eq <2 x i8> %x, <i8 255, i8 255>
@@ -3667,7 +3667,7 @@ define i8 @not_clamp_smin2(i8 %x) {
 
 define <2 x i8> @clamp_smaxval(<2 x i8> %x) {
 ; CHECK-LABEL: @clamp_smaxval(
-; CHECK-NEXT:    [[SEL:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 126, i8 126>)
+; CHECK-NEXT:    [[SEL:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 126))
 ; CHECK-NEXT:    ret <2 x i8> [[SEL]]
 ;
   %cmp = icmp eq <2 x i8> %x, <i8 127, i8 127>
diff --git a/llvm/test/Transforms/InstCombine/select_meta.ll b/llvm/test/Transforms/InstCombine/select_meta.ll
index d8f945b8d1b32f..f22b5a4fd3c5f2 100644
--- a/llvm/test/Transforms/InstCombine/select_meta.ll
+++ b/llvm/test/Transforms/InstCombine/select_meta.ll
@@ -371,7 +371,7 @@ define double @select_fmul(i1 %cond, double %x, double %y) {
 
 define <2 x float> @select_fdiv(i1 %cond, <2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @select_fdiv(
-; CHECK-NEXT:    [[OP:%.*]] = select nnan i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, !prof [[PROF0]], !unpredictable [[META2]]
+; CHECK-NEXT:    [[OP:%.*]] = select nnan i1 [[COND:%.*]], <2 x float> [[Y:%.*]], <2 x float> splat (float 1.000000e+00), !prof [[PROF0]], !unpredictable [[META2]]
 ; CHECK-NEXT:    [[RET:%.*]] = fdiv <2 x float> [[X:%.*]], [[OP]]
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll b/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll
index a3c8d3393d04fa..e6e1ea689521d8 100644
--- a/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll
+++ b/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll
@@ -187,8 +187,8 @@ define i32 @shl_nsw_nuw_add_nsw_nuw(i32 %NBits) {
 
 define <2 x i32> @shl_add_vec(<2 x i32> %NBits) {
 ; CHECK-LABEL: @shl_add_vec(
-; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw <2 x i32> <i32 -1, i32 -1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = xor <2 x i32> [[NOTMASK]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw <2 x i32> splat (i32 -1), [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor <2 x i32> [[NOTMASK]], splat (i32 -1)
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
   %setbit = shl <2 x i32> <i32 1, i32 1>, %NBits
@@ -198,8 +198,8 @@ define <2 x i32> @shl_add_vec(<2 x i32> %NBits) {
 
 define <3 x i32> @shl_add_vec_poison0(<3 x i32> %NBits) {
 ; CHECK-LABEL: @shl_add_vec_poison0(
-; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], splat (i32 -1)
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
   %setbit = shl <3 x i32> <i32 1, i32 poison, i32 1>, %NBits
@@ -209,8 +209,8 @@ define <3 x i32> @shl_add_vec_poison0(<3 x i32> %NBits) {
 
 define <3 x i32> @shl_add_vec_poison1(<3 x i32> %NBits) {
 ; CHECK-LABEL: @shl_add_vec_poison1(
-; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], splat (i32 -1)
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
   %setbit = shl <3 x i32> <i32 1, i32 1, i32 1>, %NBits
@@ -220,8 +220,8 @@ define <3 x i32> @shl_add_vec_poison1(<3 x i32> %NBits) {
 
 define <3 x i32> @shl_add_vec_poison2(<3 x i32> %NBits) {
 ; CHECK-LABEL: @shl_add_vec_poison2(
-; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[NOTMASK:%.*]] = shl nsw <3 x i32> splat (i32 -1), [[NBITS:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], splat (i32 -1)
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
   %setbit = shl <3 x i32> <i32 1, i32 poison, i32 1>, %NBits
diff --git a/llvm/test/Transforms/InstCombine/set.ll b/llvm/test/Transforms/InstCombine/set.ll
index f44ac83f7f5916..f63bb7eab28ce1 100644
--- a/llvm/test/Transforms/InstCombine/set.ll
+++ b/llvm/test/Transforms/InstCombine/set.ll
@@ -257,7 +257,7 @@ define i1 @xor_of_icmps_neg_to_ne(i64 %a) {
 
 define <2 x i1> @xor_of_icmps_to_ne_vector(<2 x i64> %a) {
 ; CHECK-LABEL: @xor_of_icmps_to_ne_vector(
-; CHECK-NEXT:    [[XOR:%.*]] = icmp ne <2 x i64> [[A:%.*]], <i64 5, i64 5>
+; CHECK-NEXT:    [[XOR:%.*]] = icmp ne <2 x i64> [[A:%.*]], splat (i64 5)
 ; CHECK-NEXT:    ret <2 x i1> [[XOR]]
 ;
   %b = icmp sgt <2 x i64> %a, <i64 4, i64 4>
@@ -419,7 +419,7 @@ define i32 @test20(i32 %A) {
 
 define <2 x i32> @test20vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test20vec(
-; CHECK-NEXT:    [[D:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[D:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
   %B = and <2 x i32> %A, <i32 1, i32 1>
@@ -442,8 +442,8 @@ define i32 @test21(i32 %a) {
 
 define <2 x i32> @test21vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test21vec(
-; CHECK-NEXT:    [[TMP_6:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 2, i32 2>
-; CHECK-NEXT:    [[TMP_6_LOBIT:%.*]] = and <2 x i32> [[TMP_6]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP_6:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 2)
+; CHECK-NEXT:    [[TMP_6_LOBIT:%.*]] = and <2 x i32> [[TMP_6]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP_6_LOBIT]]
 ;
   %tmp.6 = and <2 x i32> %a, <i32 4, i32 4>
@@ -490,8 +490,8 @@ define i32 @test23(i32 %a) {
 
 define <2 x i32> @test23vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test23vec(
-; CHECK-NEXT:    [[TMP_1:%.*]] = and <2 x i32> [[A:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP_3:%.*]] = xor <2 x i32> [[TMP_1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP_1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[TMP_3:%.*]] = xor <2 x i32> [[TMP_1]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP_3]]
 ;
   %tmp.1 = and <2 x i32> %a, <i32 1, i32 1>
@@ -516,9 +516,9 @@ define i32 @test24(i32 %a) {
 
 define <2 x i32> @test24vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test24vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 2, i32 2>
-; CHECK-NEXT:    [[DOTLOBIT:%.*]] = and <2 x i32> [[TMP1]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP_3:%.*]] = xor <2 x i32> [[DOTLOBIT]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 2)
+; CHECK-NEXT:    [[DOTLOBIT:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1)
+; CHECK-NEXT:    [[TMP_3:%.*]] = xor <2 x i32> [[DOTLOBIT]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP_3]]
 ;
   %tmp1 = and <2 x i32> %a, <i32 4, i32 4>
diff --git a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll
index b992460d0be698..516f1a2bfd5c53 100644
--- a/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll
+++ b/llvm/test/Transforms/InstCombine/sext-of-trunc-nsw.ll
@@ -55,7 +55,7 @@ define i16 @n2(i8 %x) {
 
 define <2 x i16> @t3_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @t3_vec(
-; CHECK-NEXT:    [[A:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[A:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 4)
 ; CHECK-NEXT:    call void @usevec(<2 x i8> [[A]])
 ; CHECK-NEXT:    [[C:%.*]] = sext <2 x i8> [[A]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[C]]
diff --git a/llvm/test/Transforms/InstCombine/sext.ll b/llvm/test/Transforms/InstCombine/sext.ll
index a554f2b28d6f29..ee3c52259f930a 100644
--- a/llvm/test/Transforms/InstCombine/sext.ll
+++ b/llvm/test/Transforms/InstCombine/sext.ll
@@ -143,8 +143,8 @@ define i32 @test10(i32 %i) {
 
 define <2 x i32> @test10_vec(<2 x i32> %i) {
 ; CHECK-LABEL: @test10_vec(
-; CHECK-NEXT:    [[D1:%.*]] = shl <2 x i32> [[I:%.*]], <i32 30, i32 30>
-; CHECK-NEXT:    [[D:%.*]] = ashr exact <2 x i32> [[D1]], <i32 30, i32 30>
+; CHECK-NEXT:    [[D1:%.*]] = shl <2 x i32> [[I:%.*]], splat (i32 30)
+; CHECK-NEXT:    [[D:%.*]] = ashr exact <2 x i32> [[D1]], splat (i32 30)
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
   %A = trunc <2 x i32> %i to <2 x i8>
@@ -338,8 +338,8 @@ define <2 x i32> @smear_set_bit_vec_use1(<2 x i32> %x) {
 ; CHECK-LABEL: @smear_set_bit_vec_use1(
 ; CHECK-NEXT:    [[T:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i5>
 ; CHECK-NEXT:    call void @use_vec(<2 x i5> [[T]])
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X]], <i32 27, i32 27>
-; CHECK-NEXT:    [[S:%.*]] = ashr <2 x i32> [[TMP1]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X]], splat (i32 27)
+; CHECK-NEXT:    [[S:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i32> [[S]]
 ;
   %t = trunc <2 x i32> %x to <2 x i5>
diff --git a/llvm/test/Transforms/InstCombine/shift-add.ll b/llvm/test/Transforms/InstCombine/shift-add.ll
index 016f877a9efb51..6c6bb8d6b9bc3a 100644
--- a/llvm/test/Transforms/InstCombine/shift-add.ll
+++ b/llvm/test/Transforms/InstCombine/shift-add.ll
@@ -410,9 +410,9 @@ define i32 @shl_nsw_add_negative(i32 %x) {
 
 define <2 x i8> @shl_nuw_add_negative_splat_uses(<2 x i8> %x, ptr %p) {
 ; CHECK-LABEL: @shl_nuw_add_negative_splat_uses(
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[X:%.*]], <i8 -2, i8 -2>
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 -2)
 ; CHECK-NEXT:    store <2 x i8> [[A]], ptr [[P:%.*]], align 2
-; CHECK-NEXT:    [[R:%.*]] = shl nuw <2 x i8> <i8 3, i8 3>, [[X]]
+; CHECK-NEXT:    [[R:%.*]] = shl nuw <2 x i8> splat (i8 3), [[X]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %a = add <2 x i8> %x, <i8 -2, i8 -2>
diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll
index c4260f4cb2bf88..c134833630338c 100644
--- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-lshr.ll
@@ -165,7 +165,7 @@ define i1 @n4(i32 %x, i32 %len) {
 ; New shift amount would be 16, minimal count of leading zeros in %x is 16. Ok.
 define <2 x i1> @t5_vec(<2 x i64> %y, <2 x i32> %len) {
 ; CHECK-LABEL: @t5_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], <i64 16, i64 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], splat (i64 16)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP1]], <i64 65535, i64 32767>
 ; CHECK-NEXT:    [[T5:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[T5]]
@@ -183,9 +183,9 @@ define <2 x i1> @t5_vec(<2 x i64> %y, <2 x i32> %len) {
 ; New shift amount would be 16, minimal count of leading zeros in %x is 15, not ok to fold.
 define <2 x i1> @n6_vec(<2 x i64> %y, <2 x i32> %len) {
 ; CHECK-LABEL: @n6_vec(
-; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> <i32 32, i32 32>, [[LEN:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> splat (i32 32), [[LEN:%.*]]
 ; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> <i32 65535, i32 131071>, [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[LEN]], <i32 -16, i32 -16>
+; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[LEN]], splat (i32 -16)
 ; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64>
 ; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i64> [[Y:%.*]], [[T2_WIDE]]
 ; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc <2 x i64> [[T3]] to <2 x i32>
@@ -224,9 +224,9 @@ define <2 x i1> @t7_vec(<2 x i32> %x, <2 x i32> %len) {
 ; New shift amount would be 16, minimal count of leading zeros in %x is 48, not ok to fold.
 define <2 x i1> @n8_vec(<2 x i32> %x, <2 x i32> %len) {
 ; CHECK-LABEL: @n8_vec(
-; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> <i32 32, i32 32>, [[LEN:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> splat (i32 32), [[LEN:%.*]]
 ; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]]
-; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[LEN]], <i32 -16, i32 -16>
+; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[LEN]], splat (i32 -16)
 ; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64>
 ; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i64> <i64 131071, i64 262143>, [[T2_WIDE]]
 ; CHECK-NEXT:    [[T3_TRUNC:%.*]] = trunc nuw nsw <2 x i64> [[T3]] to <2 x i32>
@@ -335,7 +335,7 @@ define i1 @t10_shift_by_one(i32 %x, i64 %y, i32 %len) {
 ; A mix of those conditions is ok.
 define <2 x i1> @t11_zero_and_almost_bitwidth(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) {
 ; CHECK-LABEL: @t11_zero_and_almost_bitwidth(
-; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> <i32 64, i32 64>, [[LEN:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> splat (i32 64), [[LEN:%.*]]
 ; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]]
 ; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[LEN]], <i32 -1, i32 -64>
 ; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64>
@@ -357,7 +357,7 @@ define <2 x i1> @t11_zero_and_almost_bitwidth(<2 x i32> %x, <2 x i64> %y, <2 x i
 }
 define <2 x i1> @n12_bad(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) {
 ; CHECK-LABEL: @n12_bad(
-; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> <i32 64, i32 64>, [[LEN:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = sub <2 x i32> splat (i32 64), [[LEN:%.*]]
 ; CHECK-NEXT:    [[T1:%.*]] = shl <2 x i32> [[X:%.*]], [[T0]]
 ; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[LEN]], <i32 -2, i32 -64>
 ; CHECK-NEXT:    [[T2_WIDE:%.*]] = zext nneg <2 x i32> [[T2]] to <2 x i64>
@@ -414,7 +414,7 @@ define i1 @t14_x_is_one(i32 %x, i32 %len) {
 
 define <2 x i1> @t15_vec_x_is_one_or_zero(<2 x i64> %y, <2 x i32> %len) {
 ; CHECK-LABEL: @t15_vec_x_is_one_or_zero(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], <i64 48, i64 48>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[Y:%.*]], splat (i64 48)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP1]], <i64 1, i64 0>
 ; CHECK-NEXT:    [[T5:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[T5]]
diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll
index 6e9552e2af4cce..6f4e78e9f91a0f 100644
--- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll
+++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest-with-truncation-shl.ll
@@ -38,7 +38,7 @@ define i1 @t0_const_after_fold_lshr_shl_ne(i32 %x, i64 %y, i32 %len) {
 
 define <2 x i1> @t1_vec_splat(<2 x i32> %x, <2 x i64> %y, <2 x i32> %len) {
 ; CHECK-LABEL: @t1_vec_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[Y:%.*]], [[TMP2]]
 ; CHECK-NEXT:    [[T5:%.*]] = icmp ne <2 x i64> [[TMP3]], zeroinitializer
@@ -363,7 +363,7 @@ define i1 @t10_constants(i32 %x, i64 %y) {
 define <2 x i1> @t11_constants_vec_splat(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: @t11_constants_vec_splat(
 ; CHECK-NEXT:    [[Y_TR:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 26, i32 26>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 26)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y_TR]]
 ; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[T3]]
@@ -378,7 +378,7 @@ define <2 x i1> @t11_constants_vec_splat(<2 x i32> %x, <2 x i64> %y) {
 define <2 x i1> @t12_constants_vec_nonsplat(<2 x i32> %x, <2 x i64> %y) {
 ; CHECK-LABEL: @t12_constants_vec_nonsplat(
 ; CHECK-NEXT:    [[Y_TR:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 28, i32 28>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 28)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y_TR]]
 ; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[T3]]
diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll
index 00a19e4962e6ca..070a3b03302124 100644
--- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll
+++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll
@@ -119,7 +119,7 @@ define i1 @t6_const_shl_lshr_ne(i32 %x, i32 %y, i32 %shamt0, i32 %shamt1) {
 
 define <2 x i1> @t7_const_lshr_shl_ne_vec_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t7_const_lshr_shl_ne_vec_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[Y:%.*]]
 ; CHECK-NEXT:    [[T3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[T3]]
@@ -171,7 +171,7 @@ define <3 x i1> @t10_const_lshr_shl_ne_vec_poison1(<3 x i32> %x, <3 x i32> %y) {
 }
 define <3 x i1> @t11_const_lshr_shl_ne_vec_poison2(<3 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @t11_const_lshr_shl_ne_vec_poison2(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]]
 ; CHECK-NEXT:    [[T3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    ret <3 x i1> [[T3]]
diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll
index 9efc30cc9d916e..78ac4cbabde7d9 100644
--- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll
+++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll
@@ -29,7 +29,7 @@ define i16 @t0(i32 %x, i16 %y) {
 
 define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
 ; CHECK-LABEL: @t1_vec_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[T5:%.*]] = trunc nsw <2 x i32> [[TMP1]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[T5]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll
index c31b6ed3ea2ba9..937345b00c0339 100644
--- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll
@@ -29,7 +29,7 @@ define i16 @t0(i32 %x, i16 %y) {
 
 define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
 ; CHECK-LABEL: @t1_vec_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[T5:%.*]] = trunc nuw nsw <2 x i32> [[TMP1]] to <2 x i16>
 ; CHECK-NEXT:    ret <2 x i16> [[T5]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll
index 073013b34a3baa..0491ce1def8488 100644
--- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll
+++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll
@@ -30,7 +30,7 @@ define i16 @t0(i32 %x, i16 %y) {
 define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) {
 ; CHECK-LABEL: @t1_vec_splat(
 ; CHECK-NEXT:    [[X_TR:%.*]] = trunc <2 x i32> [[X:%.*]] to <2 x i16>
-; CHECK-NEXT:    [[T5:%.*]] = shl <2 x i16> [[X_TR]], <i16 8, i16 8>
+; CHECK-NEXT:    [[T5:%.*]] = shl <2 x i16> [[X_TR]], splat (i16 8)
 ; CHECK-NEXT:    ret <2 x i16> [[T5]]
 ;
   %t0 = sub <2 x i16> <i16 32, i16 32>, %y
diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll
index 6bbe4c5151e458..b4c606f037d561 100644
--- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll
+++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll
@@ -24,7 +24,7 @@ define i32 @t0(i32 %x, i32 %y) {
 
 define <2 x i32> @t1_vec_splat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t1_vec_splat(
-; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 30, i32 30>
+; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 30)
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
   %t0 = sub <2 x i32> <i32 32, i32 32>, %y
@@ -36,7 +36,7 @@ define <2 x i32> @t1_vec_splat(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i32> @t2_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @t2_vec_nonsplat(
-; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 30, i32 30>
+; CHECK-NEXT:    [[T3:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 30)
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
   %t0 = sub <2 x i32> <i32 32, i32 30>, %y
diff --git a/llvm/test/Transforms/InstCombine/shift-logic.ll b/llvm/test/Transforms/InstCombine/shift-logic.ll
index 593a22bec6490b..ab8d98a9523ba8 100644
--- a/llvm/test/Transforms/InstCombine/shift-logic.ll
+++ b/llvm/test/Transforms/InstCombine/shift-logic.ll
@@ -46,7 +46,7 @@ define i16 @shl_or(i16 %x, i16 %py) {
 
 define <2 x i16> @shl_or_poison(<2 x i16> %x, <2 x i16> %py) {
 ; CHECK-LABEL: @shl_or_poison(
-; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i16> [[PY:%.*]], <i16 42, i16 42>
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i16> [[PY:%.*]], splat (i16 42)
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i16> [[X:%.*]], <i16 12, i16 poison>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw <2 x i16> [[Y]], <i16 7, i16 poison>
 ; CHECK-NEXT:    [[SH1:%.*]] = or <2 x i16> [[TMP1]], [[TMP2]]
@@ -102,7 +102,7 @@ define i64 @lshr_and(i64 %x, i64 %py) {
 
 define <2 x i64> @lshr_and_poison(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @lshr_and_poison(
-; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[X:%.*]], <i64 12, i64 poison>
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[Y]], <i64 7, i64 poison>
 ; CHECK-NEXT:    [[SH1:%.*]] = and <2 x i64> [[TMP1]], [[TMP2]]
@@ -117,8 +117,8 @@ define <2 x i64> @lshr_and_poison(<2 x i64> %x, <2 x i64> %py) {
 
 define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @lshr_or(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[X:%.*]], <i32 12, i32 12, i32 12, i32 12>
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[Y:%.*]], <i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[X:%.*]], splat (i32 12)
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[Y:%.*]], splat (i32 7)
 ; CHECK-NEXT:    [[SH1:%.*]] = or <4 x i32> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret <4 x i32> [[SH1]]
 ;
@@ -130,9 +130,9 @@ define <4 x i32> @lshr_or(<4 x i32> %x, <4 x i32> %y) {
 
 define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %py) {
 ; CHECK-LABEL: @lshr_xor(
-; CHECK-NEXT:    [[Y:%.*]] = srem <8 x i16> [[PY:%.*]], <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[X:%.*]], <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[Y]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:    [[Y:%.*]] = srem <8 x i16> [[PY:%.*]], splat (i16 42)
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> [[X:%.*]], splat (i16 12)
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[Y]], splat (i16 7)
 ; CHECK-NEXT:    [[SH1:%.*]] = xor <8 x i16> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret <8 x i16> [[SH1]]
 ;
@@ -146,8 +146,8 @@ define <8 x i16> @lshr_xor(<8 x i16> %x, <8 x i16> %py) {
 define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %py, <16 x i8> %pz) {
 ; CHECK-LABEL: @ashr_and(
 ; CHECK-NEXT:    [[Y:%.*]] = srem <16 x i8> [[PY:%.*]], [[PZ:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i8> [[X:%.*]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <16 x i8> [[Y]], <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i8> [[X:%.*]], splat (i8 5)
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <16 x i8> [[Y]], splat (i8 2)
 ; CHECK-NEXT:    [[SH1:%.*]] = and <16 x i8> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret <16 x i8> [[SH1]]
 ;
@@ -160,8 +160,8 @@ define <16 x i8> @ashr_and(<16 x i8> %x, <16 x i8> %py, <16 x i8> %pz) {
 
 define <2 x i64> @ashr_or(<2 x i64> %x, <2 x i64> %y) {
 ; CHECK-LABEL: @ashr_or(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[X:%.*]], <i64 12, i64 12>
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <2 x i64> [[Y:%.*]], <i64 7, i64 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[X:%.*]], splat (i64 12)
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <2 x i64> [[Y:%.*]], splat (i64 7)
 ; CHECK-NEXT:    [[SH1:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i64> [[SH1]]
 ;
@@ -281,7 +281,7 @@ define i64 @lshr_mul_nuw_nsw(i64 %0) {
 
 define <4 x i32> @lshr_mul_vector(<4 x i32> %0) {
 ; CHECK-LABEL: @lshr_mul_vector(
-; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw nsw <4 x i32> [[TMP0:%.*]], <i32 13, i32 13, i32 13, i32 13>
+; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw nsw <4 x i32> [[TMP0:%.*]], splat (i32 13)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %2 = mul nuw <4 x i32> %0, <i32 52, i32 52, i32 52, i32 52>
@@ -394,7 +394,7 @@ define <2 x i8> @shl_add_nonuniform(<2 x i8> %x, <2 x i8> %y) {
 
 define <2 x i64> @shl_add_poison(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @shl_add_poison(
-; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], <i64 12, i64 poison>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw <2 x i64> [[Y]], <i64 7, i64 poison>
 ; CHECK-NEXT:    [[SH1:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
@@ -436,7 +436,7 @@ define <2 x i8> @lshr_add_nonuniform(<2 x i8> %x, <2 x i8> %y) {
 
 define <2 x i64> @lshr_add_poison(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @lshr_add_poison(
-; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42)
 ; CHECK-NEXT:    [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], <i64 5, i64 poison>
 ; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i64> [[Y]], [[SH0]]
 ; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i64> [[R]], <i64 7, i64 poison>
@@ -492,7 +492,7 @@ define <2 x i8> @shl_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) {
 
 define <2 x i64> @shl_sub_poison(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @shl_sub_poison(
-; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], <i64 12, i64 poison>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw <2 x i64> [[Y]], <i64 7, i64 poison>
 ; CHECK-NEXT:    [[SH1:%.*]] = sub <2 x i64> [[TMP2]], [[TMP1]]
@@ -534,7 +534,7 @@ define <2 x i8> @lshr_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) {
 
 define <2 x i64> @lshr_sub_poison(<2 x i64> %x, <2 x i64> %py) {
 ; CHECK-LABEL: @lshr_sub_poison(
-; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42>
+; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], splat (i64 42)
 ; CHECK-NEXT:    [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], <i64 5, i64 poison>
 ; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i64> [[Y]], [[SH0]]
 ; CHECK-NEXT:    [[SH1:%.*]] = lshr <2 x i64> [[R]], <i64 7, i64 poison>
diff --git a/llvm/test/Transforms/InstCombine/shift-shift.ll b/llvm/test/Transforms/InstCombine/shift-shift.ll
index 7c35718601ba7f..fa58de3528d37b 100644
--- a/llvm/test/Transforms/InstCombine/shift-shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift-shift.ll
@@ -486,7 +486,7 @@ define i8 @shl_lshr_demand4(i8 %x) {
 
 define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> <i8 37, i8 37>, [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> splat (i8 37), [[X:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i8> [[TMP1]] to <2 x i6>
 ; CHECK-NEXT:    ret <2 x i6> [[R]]
 ;
@@ -501,7 +501,7 @@ define <2 x i6> @shl_lshr_demand5(<2 x i8> %x) {
 define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_undef_left(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 undef, i8 -108>, [[X:%.*]]
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], <i8 2, i8 2>
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], splat (i8 2)
 ; CHECK-NEXT:    [[R:%.*]] = trunc nuw <2 x i8> [[LSHR]] to <2 x i6>
 ; CHECK-NEXT:    ret <2 x i6> [[R]]
 ;
@@ -515,7 +515,7 @@ define <2 x i6> @shl_lshr_demand5_undef_left(<2 x i8> %x) {
 
 define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_undef_right(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> splat (i8 -108), [[X:%.*]]
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], <i8 undef, i8 2>
 ; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6>
 ; CHECK-NEXT:    ret <2 x i6> [[R]]
@@ -530,7 +530,7 @@ define <2 x i6> @shl_lshr_demand5_undef_right(<2 x i8> %x) {
 
 define <2 x i6> @shl_lshr_demand5_nonuniform_vec_left(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_lshr_demand5_nonuniform_vec_left(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> <i8 -108, i8 -108>, [[X:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> splat (i8 -108), [[X:%.*]]
 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr <2 x i8> [[SHL]], <i8 1, i8 2>
 ; CHECK-NEXT:    [[R:%.*]] = trunc <2 x i8> [[LSHR]] to <2 x i6>
 ; CHECK-NEXT:    ret <2 x i6> [[R]]
@@ -649,8 +649,8 @@ define i8 @lshr_shl_demand4(i8 %x) {
 
 define <2 x i8> @lshr_shl_demand5(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_shl_demand5(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> <i8 -76, i8 -76>, [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[TMP1]], <i8 108, i8 108>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> splat (i8 -76), [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[TMP1]], splat (i8 108)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %shr = lshr <2 x i8> <i8 45, i8 45>, %x ; 0b0010_1101
@@ -663,9 +663,9 @@ define <2 x i8> @lshr_shl_demand5(<2 x i8> %x) {
 
 define <2 x i8> @lshr_shl_demand5_undef_left(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_shl_demand5_undef_left(
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i8> <i8 45, i8 45>, [[X:%.*]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i8> splat (i8 45), [[X:%.*]]
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> [[SHR]], <i8 undef, i8 2>
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[SHL]], <i8 108, i8 108>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[SHL]], splat (i8 108)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %shr = lshr <2 x i8> <i8 45, i8 45>, %x ; 0b0010_1101
@@ -679,8 +679,8 @@ define <2 x i8> @lshr_shl_demand5_undef_left(<2 x i8> %x) {
 define <2 x i8> @lshr_shl_demand5_undef_right(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_shl_demand5_undef_right(
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i8> <i8 undef, i8 45>, [[X:%.*]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> [[SHR]], <i8 2, i8 2>
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[SHL]], <i8 108, i8 108>
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> [[SHR]], splat (i8 2)
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[SHL]], splat (i8 108)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %shr = lshr <2 x i8> <i8 undef, i8 45>, %x ; 0b0010_1101
@@ -693,9 +693,9 @@ define <2 x i8> @lshr_shl_demand5_undef_right(<2 x i8> %x) {
 
 define <2 x i8> @lshr_shl_demand5_nonuniform_vec_left(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_shl_demand5_nonuniform_vec_left(
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i8> <i8 45, i8 45>, [[X:%.*]]
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i8> splat (i8 45), [[X:%.*]]
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> [[SHR]], <i8 1, i8 2>
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[SHL]], <i8 108, i8 108>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[SHL]], splat (i8 108)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %shr = lshr <2 x i8> <i8 45, i8 45>, %x ; 0b0010_1101
@@ -709,7 +709,7 @@ define <2 x i8> @lshr_shl_demand5_nonuniform_vec_left(<2 x i8> %x) {
 define <2 x i8> @lshr_shl_demand5_nonuniform_vec_right(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_shl_demand5_nonuniform_vec_right(
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> <i8 -76, i8 52>, [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[TMP1]], <i8 108, i8 108>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i8> [[TMP1]], splat (i8 108)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %shr = lshr <2 x i8> <i8 45, i8 13>, %x ; 0b0010_1101. 0b0000_1101
diff --git a/llvm/test/Transforms/InstCombine/shift-sra.ll b/llvm/test/Transforms/InstCombine/shift-sra.ll
index 82bce5f02554ee..d631da0d444adb 100644
--- a/llvm/test/Transforms/InstCombine/shift-sra.ll
+++ b/llvm/test/Transforms/InstCombine/shift-sra.ll
@@ -143,7 +143,7 @@ define i32 @ashr_overshift(i32 %x) {
 
 define <2 x i32> @ashr_ashr_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @ashr_ashr_splat_vec(
-; CHECK-NEXT:    [[SH2:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 12, i32 12>
+; CHECK-NEXT:    [[SH2:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 12)
 ; CHECK-NEXT:    ret <2 x i32> [[SH2]]
 ;
   %sh1 = ashr <2 x i32> %x, <i32 5, i32 5>
@@ -155,7 +155,7 @@ define <2 x i32> @ashr_ashr_splat_vec(<2 x i32> %x) {
 
 define <2 x i32> @ashr_overshift_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @ashr_overshift_splat_vec(
-; CHECK-NEXT:    [[SH2:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[SH2:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i32> [[SH2]]
 ;
   %sh1 = ashr <2 x i32> %x, <i32 15, i32 15>
@@ -180,7 +180,7 @@ define i32 @hoist_ashr_ahead_of_sext_1(i8 %x) {
 
 define <2 x i32> @hoist_ashr_ahead_of_sext_1_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @hoist_ashr_ahead_of_sext_1_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[R:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
@@ -206,7 +206,7 @@ define i32 @hoist_ashr_ahead_of_sext_2(i8 %x) {
 
 define <2 x i32> @hoist_ashr_ahead_of_sext_2_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @hoist_ashr_ahead_of_sext_2_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7)
 ; CHECK-NEXT:    [[R:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll
index 558f4ffbfcabe4..60fdec4a8505a9 100644
--- a/llvm/test/Transforms/InstCombine/shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift.ll
@@ -238,8 +238,8 @@ define i1 @test17(i32 %A) {
 
 define <2 x i1> @test17vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test17vec(
-; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i32> [[A:%.*]], <i32 -8, i32 -8>
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], <i32 9872, i32 9872>
+; CHECK-NEXT:    [[B_MASK:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -8)
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i32> [[B_MASK]], splat (i32 9872)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = lshr <2 x i32> %A, <i32 3, i32 3>
@@ -270,7 +270,7 @@ define i1 @test19(i32 %A) {
 
 define <2 x i1> @test19vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test19vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> [[A:%.*]], splat (i32 4)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = ashr <2 x i32> %A, <i32 2, i32 2>
@@ -291,7 +291,7 @@ define i1 @test19a(i32 %A) {
 
 define <2 x i1> @test19a_vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test19a_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], <i32 -5, i32 -5>
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> [[A:%.*]], splat (i32 -5)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = ashr <2 x i32> %A, <i32 2, i32 2>
@@ -436,7 +436,7 @@ entry:
 define <2 x i32> @test29_uniform(<2 x i64> %d18) {
 ; CHECK-LABEL: @test29_uniform(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SUM_SHIFT:%.*]] = lshr <2 x i64> [[D18:%.*]], <i64 63, i64 63>
+; CHECK-NEXT:    [[SUM_SHIFT:%.*]] = lshr <2 x i64> [[D18:%.*]], splat (i64 63)
 ; CHECK-NEXT:    [[I101:%.*]] = trunc nuw nsw <2 x i64> [[SUM_SHIFT]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[I101]]
 ;
@@ -526,7 +526,7 @@ define i1 @test33(i32 %X) {
 
 define <2 x i1> @test33vec(<2 x i32> %X) {
 ; CHECK-LABEL: @test33vec(
-; CHECK-NEXT:    [[I1_MASK:%.*]] = and <2 x i32> [[X:%.*]], <i32 16777216, i32 16777216>
+; CHECK-NEXT:    [[I1_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 16777216)
 ; CHECK-NEXT:    [[I2:%.*]] = icmp ne <2 x i32> [[I1_MASK]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[I2]]
 ;
@@ -597,7 +597,7 @@ define i64 @test37(i128 %A, i32 %B) {
 define <2 x i32> @shl_nuw_nsw_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_nuw_nsw_splat_vec(
 ; CHECK-NEXT:    [[T2:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[T3:%.*]] = shl nuw nsw <2 x i32> [[T2]], <i32 17, i32 17>
+; CHECK-NEXT:    [[T3:%.*]] = shl nuw nsw <2 x i32> [[T2]], splat (i32 17)
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
   %t2 = zext <2 x i8> %x to <2 x i32>
@@ -618,8 +618,8 @@ define i32 @test38(i32 %x) {
 
 define <2 x i32> @test38_uniform(<2 x i32> %x) {
 ; CHECK-LABEL: @test38_uniform(
-; CHECK-NEXT:    [[REM1:%.*]] = and <2 x i32> [[X:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <2 x i32> <i32 1, i32 1>, [[REM1]]
+; CHECK-NEXT:    [[REM1:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 31)
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <2 x i32> splat (i32 1), [[REM1]]
 ; CHECK-NEXT:    ret <2 x i32> [[SHL]]
 ;
   %rem = srem <2 x i32> %x, <i32 32, i32 32>
@@ -630,7 +630,7 @@ define <2 x i32> @test38_uniform(<2 x i32> %x) {
 define <3 x i32> @test38_nonuniform(<3 x i32> %x) {
 ; CHECK-LABEL: @test38_nonuniform(
 ; CHECK-NEXT:    [[REM1:%.*]] = and <3 x i32> [[X:%.*]], <i32 31, i32 15, i32 0>
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <3 x i32> <i32 1, i32 1, i32 1>, [[REM1]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <3 x i32> splat (i32 1), [[REM1]]
 ; CHECK-NEXT:    ret <3 x i32> [[SHL]]
 ;
   %rem = srem <3 x i32> %x, <i32 32, i32 16, i32 1>
@@ -688,7 +688,7 @@ define i32 @test42(i32 %a, i32 %b) {
 
 define <2 x i32> @test42vec(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @test42vec(
-; CHECK-NEXT:    [[DIV:%.*]] = lshr exact <2 x i32> <i32 4096, i32 4096>, [[B:%.*]]
+; CHECK-NEXT:    [[DIV:%.*]] = lshr exact <2 x i32> splat (i32 4096), [[B:%.*]]
 ; CHECK-NEXT:    [[DIV2:%.*]] = udiv <2 x i32> [[A:%.*]], [[DIV]]
 ; CHECK-NEXT:    ret <2 x i32> [[DIV2]]
 ;
@@ -770,7 +770,7 @@ define i32 @test46(i32 %a) {
 
 define <2 x i32> @test46_splat_vec(<2 x i32> %a) {
 ; CHECK-LABEL: @test46_splat_vec(
-; CHECK-NEXT:    [[Z:%.*]] = ashr exact <2 x i32> [[A:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[Z:%.*]] = ashr exact <2 x i32> [[A:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[Z]]
 ;
   %y = ashr exact <2 x i32> %a, <i32 3, i32 3>
@@ -794,7 +794,7 @@ define i8 @test47(i8 %a) {
 
 define <2 x i8> @test47_splat_vec(<2 x i8> %a) {
 ; CHECK-LABEL: @test47_splat_vec(
-; CHECK-NEXT:    [[Z:%.*]] = lshr exact <2 x i8> [[A:%.*]], <i8 2, i8 2>
+; CHECK-NEXT:    [[Z:%.*]] = lshr exact <2 x i8> [[A:%.*]], splat (i8 2)
 ; CHECK-NEXT:    ret <2 x i8> [[Z]]
 ;
   %y = lshr exact <2 x i8> %a, <i8 3, i8 3>
@@ -830,7 +830,7 @@ define i32 @test48_nuw_nsw(i32 %x) {
 
 define <2 x i32> @test48_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test48_splat_vec(
-; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %A = lshr exact <2 x i32> %x, <i32 1, i32 1>
@@ -866,7 +866,7 @@ define i32 @test49_nuw_nsw(i32 %x) {
 
 define <2 x i32> @test49_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test49_splat_vec(
-; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %A = ashr exact <2 x i32> %x, <i32 1, i32 1>
@@ -891,7 +891,7 @@ define i32 @test50(i32 %x) {
 
 define <2 x i32> @test50_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test50_splat_vec(
-; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %A = shl nsw <2 x i32> %x, <i32 1, i32 1>
@@ -916,7 +916,7 @@ define i32 @test51(i32 %x) {
 
 define <2 x i32> @test51_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test51_splat_vec(
-; CHECK-NEXT:    [[B:%.*]] = lshr exact <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[B:%.*]] = lshr exact <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %A = shl nuw <2 x i32> %x, <i32 1, i32 1>
@@ -942,8 +942,8 @@ define i32 @test51_no_nuw(i32 %x) {
 
 define <2 x i32> @test51_no_nuw_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test51_no_nuw_splat_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 2, i32 2>
-; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[TMP1]], <i32 536870911, i32 536870911>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 2)
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i32> [[TMP1]], splat (i32 536870911)
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %A = shl <2 x i32> %x, <i32 1, i32 1>
@@ -967,7 +967,7 @@ define i32 @test52(i32 %x) {
 
 define <2 x i32> @test52_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test52_splat_vec(
-; CHECK-NEXT:    [[B:%.*]] = shl nsw <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[B:%.*]] = shl nsw <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %A = shl nsw <2 x i32> %x, <i32 3, i32 3>
@@ -991,7 +991,7 @@ define i32 @test53(i32 %x) {
 
 define <2 x i32> @test53_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test53_splat_vec(
-; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], <i32 2, i32 2>
+; CHECK-NEXT:    [[B:%.*]] = shl nuw nsw <2 x i32> [[X:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %A = shl nuw <2 x i32> %x, <i32 3, i32 3>
@@ -1016,8 +1016,8 @@ define i8 @test53_no_nuw(i8 %x) {
 
 define <2 x i8> @test53_no_nuw_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @test53_no_nuw_splat_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 2, i8 2>
-; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[TMP1]], <i8 124, i8 124>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 2)
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i8> [[TMP1]], splat (i8 124)
 ; CHECK-NEXT:    ret <2 x i8> [[B]]
 ;
   %A = shl <2 x i8> %x, <i8 3, i8 3>
@@ -1039,8 +1039,8 @@ define i32 @test54(i32 %x) {
 
 define <2 x i32> @test54_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test54_splat_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 3, i32 3>
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[TMP1]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 3)
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[TMP1]], splat (i32 16)
 ; CHECK-NEXT:    ret <2 x i32> [[AND]]
 ;
   %shr2 = lshr <2 x i32> %x, <i32 1, i32 1>
@@ -1101,8 +1101,8 @@ define i32 @test58(i32 %x) {
 
 define <2 x i32> @test58_splat_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test58_splat_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 3, i32 3>
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 3)
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[OR]]
 ;
   %shr = ashr <2 x i32> %x, <i32 4, i32 4>
@@ -1238,7 +1238,7 @@ define i32 @test_shl_zext_bool(i1 %t) {
 
 define <2 x i32> @test_shl_zext_bool_splat(<2 x i1> %t) {
 ; CHECK-LABEL: @test_shl_zext_bool_splat(
-; CHECK-NEXT:    [[SHL:%.*]] = select <2 x i1> [[T:%.*]], <2 x i32> <i32 8, i32 8>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[SHL:%.*]] = select <2 x i1> [[T:%.*]], <2 x i32> splat (i32 8), <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[SHL]]
 ;
   %ext = zext <2 x i1> %t to <2 x i32>
@@ -1296,7 +1296,7 @@ define i64 @shl_zext_extra_use(i32 %t) {
 
 define <2 x i64> @shl_zext_splat_vec(<2 x i32> %t) {
 ; CHECK-LABEL: @shl_zext_splat_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[T:%.*]], <i32 8, i32 8>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[T:%.*]], splat (i32 8)
 ; CHECK-NEXT:    [[SHL:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[SHL]]
 ;
@@ -1377,7 +1377,7 @@ define i64 @shl_zext_mul_extra_use2(i32 %t) {
 
 define <2 x i8> @ashr_demanded_bits_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @ashr_demanded_bits_splat(
-; CHECK-NEXT:    [[SHR:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[SHR:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7)
 ; CHECK-NEXT:    ret <2 x i8> [[SHR]]
 ;
   %and = and <2 x i8> %x, <i8 128, i8 128>
@@ -1397,7 +1397,7 @@ define <vscale x 8 x i8> @ashr_demanded_bits_splat2(<vscale x 8 x i8> %x) {
 
 define <2 x i8> @lshr_demanded_bits_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_demanded_bits_splat(
-; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 7)
 ; CHECK-NEXT:    ret <2 x i8> [[SHR]]
 ;
   %and = and <2 x i8> %x, <i8 128, i8 128>
@@ -1913,7 +1913,7 @@ define i64 @lshr_mul_negpow2_2(i64 %x) {
 define <2 x i32> @lshr_mul_negpow2_3(<2 x i32> %x) {
 ; CHECK-LABEL: @lshr_mul_negpow2_3(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = and <2 x i32> [[TMP1]], <i32 255, i32 255>
+; CHECK-NEXT:    [[A:%.*]] = and <2 x i32> [[TMP1]], splat (i32 255)
 ; CHECK-NEXT:    ret <2 x i32> [[A]]
 ;
   %a = mul <2 x i32> %x, <i32 -16777216, i32 -16777216>
@@ -1937,8 +1937,8 @@ define i32 @lshr_mul_negpow2_4(i32 %x) {
 define <2 x i32> @lshr_mul_negpow2_5(<2 x i32> %x) {
 ; CHECK-LABEL: @lshr_mul_negpow2_5(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = and <2 x i32> [[TMP1]], <i32 65527, i32 65527>
-; CHECK-NEXT:    [[B:%.*]] = or disjoint <2 x i32> [[A]], <i32 8, i32 8>
+; CHECK-NEXT:    [[A:%.*]] = and <2 x i32> [[TMP1]], splat (i32 65527)
+; CHECK-NEXT:    [[B:%.*]] = or disjoint <2 x i32> [[A]], splat (i32 8)
 ; CHECK-NEXT:    ret <2 x i32> [[B]]
 ;
   %a = mul <2 x i32> %x, <i32 -65536, i32 -65536>
@@ -1973,7 +1973,7 @@ define i8 @ashr_sdiv_pos(i8 %x) {
 
 define <2 x i8> @ashr_sdiv_neg_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @ashr_sdiv_neg_splat_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 41, i8 41>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 41)
 ; CHECK-NEXT:    [[R:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -1984,7 +1984,7 @@ define <2 x i8> @ashr_sdiv_neg_splat_vec(<2 x i8> %x) {
 
 define <2 x i8> @ashr_sdiv_neg_splat_vec_poison(<2 x i8> %x) {
 ; CHECK-LABEL: @ashr_sdiv_neg_splat_vec_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], splat (i8 127)
 ; CHECK-NEXT:    [[R:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -2119,7 +2119,7 @@ define i6 @shl_or7_eq_shl7(i6 %x, i6 %c) {
 
 define <2 x i8> @lshr_vec_or7_eq_shl7(<2 x i8> %x, <2 x i8> %c) {
 ; CHECK-LABEL: @lshr_vec_or7_eq_shl7(
-; CHECK-NEXT:    [[Y:%.*]] = lshr exact <2 x i8> [[X:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[Y:%.*]] = lshr exact <2 x i8> [[X:%.*]], splat (i8 7)
 ; CHECK-NEXT:    ret <2 x i8> [[Y]]
 ;
   %amt = or <2 x i8> %c, <i8 7, i8 7>
@@ -2130,7 +2130,7 @@ define <2 x i8> @lshr_vec_or7_eq_shl7(<2 x i8> %x, <2 x i8> %c) {
 
 define <2 x i8> @ashr_vec_or7_eq_ashr7(<2 x i8> %x, <2 x i8> %c) {
 ; CHECK-LABEL: @ashr_vec_or7_eq_ashr7(
-; CHECK-NEXT:    [[Y:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7>
+; CHECK-NEXT:    [[Y:%.*]] = ashr <2 x i8> [[X:%.*]], splat (i8 7)
 ; CHECK-NEXT:    ret <2 x i8> [[Y]]
 ;
   %amt = or <2 x i8> %c, <i8 7, i8 7>
@@ -2141,7 +2141,7 @@ define <2 x i8> @ashr_vec_or7_eq_ashr7(<2 x i8> %x, <2 x i8> %c) {
 ; Negative test not bitwidth - 1
 define <2 x i8> @ashr_vec_or6_fail(<2 x i8> %x, <2 x i8> %c) {
 ; CHECK-LABEL: @ashr_vec_or6_fail(
-; CHECK-NEXT:    [[AMT:%.*]] = or <2 x i8> [[C:%.*]], <i8 6, i8 6>
+; CHECK-NEXT:    [[AMT:%.*]] = or <2 x i8> [[C:%.*]], splat (i8 6)
 ; CHECK-NEXT:    [[Y:%.*]] = ashr <2 x i8> [[X:%.*]], [[AMT]]
 ; CHECK-NEXT:    ret <2 x i8> [[Y]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll
index daa49557965943..197b9c80658e60 100644
--- a/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll
+++ b/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll
@@ -72,7 +72,7 @@ define i1 @scalar_i32_shl_and_negC_ne(i32 %x, i32 %y) {
 define <4 x i1> @vec_4xi32_shl_and_negC_eq(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_shl_and_negC_eq(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[SHL]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 8)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
@@ -84,7 +84,7 @@ define <4 x i1> @vec_4xi32_shl_and_negC_eq(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_shl_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_shl_and_negC_eq_poison1(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[SHL]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 8)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
@@ -96,7 +96,7 @@ define <4 x i1> @vec_shl_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_shl_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_shl_and_negC_eq_poison2(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[SHL]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 8)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
@@ -108,7 +108,7 @@ define <4 x i1> @vec_shl_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_shl_and_negC_eq_poison3(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_shl_and_negC_eq_poison3(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[SHL]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[R:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 8)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
diff --git a/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll
index dcc181945357da..27542dad2fe37c 100644
--- a/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll
+++ b/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll
@@ -72,7 +72,7 @@ define i1 @scalar_i32_shl_and_signbit_ne(i32 %x, i32 %y) {
 define <4 x i1> @vec_4xi32_shl_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], splat (i32 -1)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
@@ -84,7 +84,7 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_poison1(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], splat (i32 -1)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
@@ -96,7 +96,7 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> %y
 define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_poison2(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], splat (i32 -1)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
@@ -108,7 +108,7 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> %y
 define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison3(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_poison3(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], splat (i32 -1)
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
 ;
   %shl = shl <4 x i32> %x, %y
diff --git a/llvm/test/Transforms/InstCombine/shl-bo.ll b/llvm/test/Transforms/InstCombine/shl-bo.ll
index 356c4a288f9e31..c32ac2eacb25a1 100644
--- a/llvm/test/Transforms/InstCombine/shl-bo.ll
+++ b/llvm/test/Transforms/InstCombine/shl-bo.ll
@@ -20,10 +20,10 @@ define i8 @lshr_add(i8 %a, i8 %y) {
 
 define <2 x i8> @lshr_add_commute_splat(<2 x i8> %a, <2 x i8> %y) {
 ; CHECK-LABEL: @lshr_add_commute_splat(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], <i8 5, i8 5>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42)
+; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 5)
 ; CHECK-NEXT:    [[R2:%.*]] = add <2 x i8> [[Y:%.*]], [[B1]]
-; CHECK-NEXT:    [[L:%.*]] = and <2 x i8> [[R2]], <i8 -32, i8 -32>
+; CHECK-NEXT:    [[L:%.*]] = and <2 x i8> [[R2]], splat (i8 -32)
 ; CHECK-NEXT:    ret <2 x i8> [[L]]
 ;
   %x = srem <2 x i8> %a, <i8 42, i8 42> ; thwart complexity-based canonicalization
@@ -50,10 +50,10 @@ define i8 @lshr_sub(i8 %a, i8 %y) {
 
 define <2 x i8> @lshr_sub_commute_splat(<2 x i8> %a, <2 x i8> %y) {
 ; CHECK-LABEL: @lshr_sub_commute_splat(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], <i8 3, i8 3>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42)
+; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 3)
 ; CHECK-NEXT:    [[R2:%.*]] = sub <2 x i8> [[Y:%.*]], [[B1]]
-; CHECK-NEXT:    [[L:%.*]] = and <2 x i8> [[R2]], <i8 -8, i8 -8>
+; CHECK-NEXT:    [[L:%.*]] = and <2 x i8> [[R2]], splat (i8 -8)
 ; CHECK-NEXT:    ret <2 x i8> [[L]]
 ;
   %x = srem <2 x i8> %a, <i8 42, i8 42> ; thwart complexity-based canonicalization
@@ -79,8 +79,8 @@ define i8 @lshr_and(i8 %a, i8 %y) {
 
 define <2 x i8> @lshr_and_commute_splat(<2 x i8> %a, <2 x i8> %y) {
 ; CHECK-LABEL: @lshr_and_commute_splat(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], <i8 6, i8 6>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42)
+; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 6)
 ; CHECK-NEXT:    [[R2:%.*]] = and <2 x i8> [[Y:%.*]], [[B1]]
 ; CHECK-NEXT:    ret <2 x i8> [[R2]]
 ;
@@ -108,9 +108,9 @@ define i8 @lshr_or(i8 %a, i8 %y) {
 
 define <2 x i8> @lshr_or_commute_splat(<2 x i8> %a, <2 x i8> %y) {
 ; CHECK-LABEL: @lshr_or_commute_splat(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], <i8 4, i8 4>
-; CHECK-NEXT:    [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], <i8 -16, i8 -16>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42)
+; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 4)
+; CHECK-NEXT:    [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 -16)
 ; CHECK-NEXT:    [[L:%.*]] = or <2 x i8> [[Y_MASKED]], [[B1]]
 ; CHECK-NEXT:    ret <2 x i8> [[L]]
 ;
@@ -138,9 +138,9 @@ define i8 @lshr_xor(i8 %a, i8 %y) {
 
 define <2 x i8> @lshr_xor_commute_splat(<2 x i8> %a, <2 x i8> %y) {
 ; CHECK-LABEL: @lshr_xor_commute_splat(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], <i8 3, i8 3>
-; CHECK-NEXT:    [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], <i8 -8, i8 -8>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42)
+; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 3)
+; CHECK-NEXT:    [[Y_MASKED:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 -8)
 ; CHECK-NEXT:    [[L:%.*]] = xor <2 x i8> [[Y_MASKED]], [[B1]]
 ; CHECK-NEXT:    ret <2 x i8> [[L]]
 ;
@@ -199,9 +199,9 @@ define i8 @lshr_and_add(i8 %a, i8 %y)  {
 
 define <2 x i8> @lshr_and_add_commute_splat(<2 x i8> %a, <2 x i8> %y)  {
 ; CHECK-LABEL: @lshr_and_add_commute_splat(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], <i8 3, i8 3>
-; CHECK-NEXT:    [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], <i8 96, i8 96>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42)
+; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 3)
+; CHECK-NEXT:    [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 96)
 ; CHECK-NEXT:    [[L:%.*]] = add <2 x i8> [[Y_MASK]], [[B1]]
 ; CHECK-NEXT:    ret <2 x i8> [[L]]
 ;
@@ -232,9 +232,9 @@ define i8 @lshr_and_sub(i8 %a, i8 %y)  {
 
 define <2 x i8> @lshr_and_sub_commute_splat(<2 x i8> %a, <2 x i8> %y)  {
 ; CHECK-LABEL: @lshr_and_sub_commute_splat(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], <i8 2, i8 2>
-; CHECK-NEXT:    [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], <i8 52, i8 52>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42)
+; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 2)
+; CHECK-NEXT:    [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 52)
 ; CHECK-NEXT:    [[L:%.*]] = sub <2 x i8> [[Y_MASK]], [[B1]]
 ; CHECK-NEXT:    ret <2 x i8> [[L]]
 ;
@@ -264,9 +264,9 @@ define i8 @lshr_and_and(i8 %a, i8 %y)  {
 
 define <2 x i8> @lshr_and_and_commute_splat(<2 x i8> %a, <2 x i8> %y)  {
 ; CHECK-LABEL: @lshr_and_and_commute_splat(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], <i8 2, i8 2>
-; CHECK-NEXT:    [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], <i8 52, i8 52>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42)
+; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 2)
+; CHECK-NEXT:    [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 52)
 ; CHECK-NEXT:    [[L:%.*]] = and <2 x i8> [[Y_MASK]], [[B1]]
 ; CHECK-NEXT:    ret <2 x i8> [[L]]
 ;
@@ -329,9 +329,9 @@ define i8 @ashr_and_or_disjoint(i8 %a, i8 %y)  {
 
 define <2 x i8> @lshr_and_or_commute_splat(<2 x i8> %a, <2 x i8> %y)  {
 ; CHECK-LABEL: @lshr_and_or_commute_splat(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], <i8 2, i8 2>
-; CHECK-NEXT:    [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], <i8 52, i8 52>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42)
+; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 2)
+; CHECK-NEXT:    [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 52)
 ; CHECK-NEXT:    [[L:%.*]] = or <2 x i8> [[Y_MASK]], [[B1]]
 ; CHECK-NEXT:    ret <2 x i8> [[L]]
 ;
@@ -361,9 +361,9 @@ define i8 @lshr_and_xor(i8 %a, i8 %y)  {
 
 define <2 x i8> @lshr_and_xor_commute_splat(<2 x i8> %a, <2 x i8> %y)  {
 ; CHECK-LABEL: @lshr_and_xor_commute_splat(
-; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], <i8 42, i8 42>
-; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], <i8 2, i8 2>
-; CHECK-NEXT:    [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], <i8 52, i8 52>
+; CHECK-NEXT:    [[X:%.*]] = srem <2 x i8> [[A:%.*]], splat (i8 42)
+; CHECK-NEXT:    [[B1:%.*]] = shl <2 x i8> [[X]], splat (i8 2)
+; CHECK-NEXT:    [[Y_MASK:%.*]] = and <2 x i8> [[Y:%.*]], splat (i8 52)
 ; CHECK-NEXT:    [[L:%.*]] = xor <2 x i8> [[Y_MASK]], [[B1]]
 ; CHECK-NEXT:    ret <2 x i8> [[L]]
 ;
@@ -525,8 +525,8 @@ define i32 @lshr_add_and_shl(i32 %x, i32 %y)  {
 
 define <2 x i32> @lshr_add_and_shl_v2i32(<2 x i32> %x, <2 x i32> %y)  {
 ; CHECK-LABEL: @lshr_add_and_shl_v2i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 5, i32 5>
-; CHECK-NEXT:    [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], <i32 4064, i32 4064>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[Y:%.*]], splat (i32 5)
+; CHECK-NEXT:    [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 4064)
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[X_MASK]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
@@ -540,7 +540,7 @@ define <2 x i32> @lshr_add_and_shl_v2i32(<2 x i32> %x, <2 x i32> %y)  {
 define <2 x i32> @lshr_add_and_shl_v2i32_undef(<2 x i32> %x, <2 x i32> %y)  {
 ; CHECK-LABEL: @lshr_add_and_shl_v2i32_undef(
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 undef, i32 5>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 127, i32 127>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], splat (i32 127)
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[Y:%.*]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = shl <2 x i32> [[TMP3]], <i32 5, i32 undef>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP4]]
@@ -583,8 +583,8 @@ define i32 @shl_add_and_lshr(i32 %x, i32 %y) {
 
 define <2 x i32> @shl_add_and_lshr_v2i32(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @shl_add_and_lshr_v2i32(
-; CHECK-NEXT:    [[C1:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 4, i32 4>
-; CHECK-NEXT:    [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], <i32 128, i32 128>
+; CHECK-NEXT:    [[C1:%.*]] = shl <2 x i32> [[Y:%.*]], splat (i32 4)
+; CHECK-NEXT:    [[X_MASK:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 128)
 ; CHECK-NEXT:    [[D:%.*]] = add <2 x i32> [[X_MASK]], [[C1]]
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
@@ -647,8 +647,8 @@ define <8 x i16> @test_FoldShiftByConstant_CreateSHL2(<8 x i16> %in) {
 
 define <16 x i8> @test_FoldShiftByConstant_CreateAnd(<16 x i8> %in0) {
 ; CHECK-LABEL: @test_FoldShiftByConstant_CreateAnd(
-; CHECK-NEXT:    [[VSRA_N2:%.*]] = mul <16 x i8> [[IN0:%.*]], <i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33>
-; CHECK-NEXT:    [[VSHL_N:%.*]] = and <16 x i8> [[VSRA_N2]], <i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32, i8 -32>
+; CHECK-NEXT:    [[VSRA_N2:%.*]] = mul <16 x i8> [[IN0:%.*]], splat (i8 33)
+; CHECK-NEXT:    [[VSHL_N:%.*]] = and <16 x i8> [[VSRA_N2]], splat (i8 -32)
 ; CHECK-NEXT:    ret <16 x i8> [[VSHL_N]]
 ;
   %vsra_n = ashr <16 x i8> %in0, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
diff --git a/llvm/test/Transforms/InstCombine/shl-demand.ll b/llvm/test/Transforms/InstCombine/shl-demand.ll
index 08e6e745818489..00b681a9b4254e 100644
--- a/llvm/test/Transforms/InstCombine/shl-demand.ll
+++ b/llvm/test/Transforms/InstCombine/shl-demand.ll
@@ -95,9 +95,9 @@ define i32 @src_srem_shl_demand_max_mask_hit_demand(i32 %a0) {
 
 define <2 x i32> @src_srem_shl_mask_vector(<2 x i32> %a0) {
 ; CHECK-LABEL: @src_srem_shl_mask_vector(
-; CHECK-NEXT:    [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], <i32 4, i32 4>
-; CHECK-NEXT:    [[SHL:%.*]] = shl nsw <2 x i32> [[SREM]], <i32 29, i32 29>
-; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[SHL]], <i32 -1073741824, i32 -1073741824>
+; CHECK-NEXT:    [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], splat (i32 4)
+; CHECK-NEXT:    [[SHL:%.*]] = shl nsw <2 x i32> [[SREM]], splat (i32 29)
+; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[SHL]], splat (i32 -1073741824)
 ; CHECK-NEXT:    ret <2 x i32> [[MASK]]
 ;
   %srem = srem <2 x i32> %a0, <i32 4, i32 4>
@@ -108,9 +108,9 @@ define <2 x i32> @src_srem_shl_mask_vector(<2 x i32> %a0) {
 
 define <2 x i32> @src_srem_shl_mask_vector_nonconstant(<2 x i32> %a0, <2 x i32> %a1) {
 ; CHECK-LABEL: @src_srem_shl_mask_vector_nonconstant(
-; CHECK-NEXT:    [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[SREM:%.*]] = srem <2 x i32> [[A0:%.*]], splat (i32 4)
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[SREM]], [[A1:%.*]]
-; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[SHL]], <i32 -1073741824, i32 -1073741824>
+; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i32> [[SHL]], splat (i32 -1073741824)
 ; CHECK-NEXT:    ret <2 x i32> [[MASK]]
 ;
   %srem = srem <2 x i32> %a0, <i32 4, i32 4>
diff --git a/llvm/test/Transforms/InstCombine/shl-sub.ll b/llvm/test/Transforms/InstCombine/shl-sub.ll
index 962cf0562ae4e4..6cf60a76da9fb1 100644
--- a/llvm/test/Transforms/InstCombine/shl-sub.ll
+++ b/llvm/test/Transforms/InstCombine/shl-sub.ll
@@ -48,7 +48,7 @@ define i64 @shl_sub_i64(i64 %x) {
 
 define <2 x i64> @shl_sub_i64_vec(<2 x i64> %x) {
 ; CHECK-LABEL: @shl_sub_i64_vec(
-; CHECK-NEXT:    [[R:%.*]] = lshr exact <2 x i64> <i64 -9223372036854775808, i64 -9223372036854775808>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = lshr exact <2 x i64> splat (i64 -9223372036854775808), [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 ;
   %s = sub <2 x i64> <i64 63, i64 63>, %x
@@ -58,7 +58,7 @@ define <2 x i64> @shl_sub_i64_vec(<2 x i64> %x) {
 
 define <3 x i64> @shl_sub_i64_vec_poison(<3 x i64> %x) {
 ; CHECK-LABEL: @shl_sub_i64_vec_poison(
-; CHECK-NEXT:    [[R:%.*]] = lshr exact <3 x i64> <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = lshr exact <3 x i64> splat (i64 -9223372036854775808), [[X:%.*]]
 ; CHECK-NEXT:    ret <3 x i64> [[R]]
 ;
   %s = sub <3 x i64> <i64 63, i64 63, i64 63>, %x
@@ -136,8 +136,8 @@ define i64 @shl_bad_sub_i64(i64 %x) {
 
 define <2 x i64> @shl_bad_sub_i64_vec(<2 x i64> %x) {
 ; CHECK-LABEL: @shl_bad_sub_i64_vec(
-; CHECK-NEXT:    [[S:%.*]] = sub <2 x i64> <i64 53, i64 53>, [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = shl nuw <2 x i64> <i64 1, i64 1>, [[S]]
+; CHECK-NEXT:    [[S:%.*]] = sub <2 x i64> splat (i64 53), [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shl nuw <2 x i64> splat (i64 1), [[S]]
 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 ;
   %s = sub <2 x i64> <i64 53, i64 53>, %x
@@ -147,8 +147,8 @@ define <2 x i64> @shl_bad_sub_i64_vec(<2 x i64> %x) {
 
 define <2 x i64> @bad_shl_sub_i64_vec(<2 x i64> %x) {
 ; CHECK-LABEL: @bad_shl_sub_i64_vec(
-; CHECK-NEXT:    [[S:%.*]] = sub <2 x i64> <i64 63, i64 63>, [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = shl <2 x i64> <i64 2, i64 2>, [[S]]
+; CHECK-NEXT:    [[S:%.*]] = sub <2 x i64> splat (i64 63), [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = shl <2 x i64> splat (i64 2), [[S]]
 ; CHECK-NEXT:    ret <2 x i64> [[R]]
 ;
   %s = sub <2 x i64> <i64 63, i64 63>, %x
@@ -159,7 +159,7 @@ define <2 x i64> @bad_shl_sub_i64_vec(<2 x i64> %x) {
 define <3 x i64> @shl_sub_i64_vec_undef_bad(<3 x i64> %x) {
 ; CHECK-LABEL: @shl_sub_i64_vec_undef_bad(
 ; CHECK-NEXT:    [[S:%.*]] = sub <3 x i64> <i64 63, i64 undef, i64 63>, [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = shl nuw <3 x i64> <i64 1, i64 1, i64 1>, [[S]]
+; CHECK-NEXT:    [[R:%.*]] = shl nuw <3 x i64> splat (i64 1), [[S]]
 ; CHECK-NEXT:    ret <3 x i64> [[R]]
 ;
   %s = sub <3 x i64> <i64 63, i64 undef, i64 63>, %x
@@ -191,8 +191,8 @@ define i32 @shl_const_op1_sub_const_op0(i32 %x) {
 
 define <2 x i32> @shl_const_op1_sub_const_op0_splat(<2 x i32> %x) {
 ; CHECK-LABEL: @shl_const_op1_sub_const_op0_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], <i32 3, i32 3>
-; CHECK-NEXT:    [[R:%.*]] = sub <2 x i32> <i32 336, i32 336>, [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 3)
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i32> splat (i32 336), [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %s = sub <2 x i32> <i32 42, i32 42>, %x
diff --git a/llvm/test/Transforms/InstCombine/shl-unsigned-cmp-const.ll b/llvm/test/Transforms/InstCombine/shl-unsigned-cmp-const.ll
index 25b26770c366db..6d6bcb3c46c465 100644
--- a/llvm/test/Transforms/InstCombine/shl-unsigned-cmp-const.ll
+++ b/llvm/test/Transforms/InstCombine/shl-unsigned-cmp-const.ll
@@ -116,7 +116,7 @@ define i1 @scalar_i8_shl_ugt_const(i8 %x) {
 
 define <4 x i1> @vector_4xi32_shl_ult_const(<4 x i32> %x) {
 ; CHECK-LABEL: @vector_4xi32_shl_ult_const(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], <i32 2097088, i32 2097088, i32 2097088, i32 2097088>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 2097088)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -128,7 +128,7 @@ define <4 x i1> @vector_4xi32_shl_ult_const(<4 x i32> %x) {
 define <4 x i1> @vector_4xi32_shl_ult_const_undef1(<4 x i32> %x) {
 ; CHECK-LABEL: @vector_4xi32_shl_ult_const_undef1(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], <i32 11, i32 11, i32 undef, i32 11>
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <4 x i32> [[SHL]], <i32 131072, i32 131072, i32 131072, i32 131072>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <4 x i32> [[SHL]], splat (i32 131072)
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
   %shl = shl <4 x i32> %x, <i32 11, i32 11, i32 undef, i32 11>
@@ -138,7 +138,7 @@ define <4 x i1> @vector_4xi32_shl_ult_const_undef1(<4 x i32> %x) {
 
 define <4 x i1> @vector_4xi32_shl_ult_const_undef2(<4 x i32> %x) {
 ; CHECK-LABEL: @vector_4xi32_shl_ult_const_undef2(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], <i32 11, i32 11, i32 11, i32 11>
+; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], splat (i32 11)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <4 x i32> [[SHL]], <i32 131072, i32 131072, i32 131072, i32 undef>
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -161,7 +161,7 @@ define <4 x i1> @vector_4xi32_shl_ult_const_undef3(<4 x i32> %x) {
 ; Check 'uge' predicate
 define <4 x i1> @vector_4xi32_shl_uge_const(<4 x i32> %x) {
 ; CHECK-LABEL: @vector_4xi32_shl_uge_const(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], <i32 2097088, i32 2097088, i32 2097088, i32 2097088>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 2097088)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -173,7 +173,7 @@ define <4 x i1> @vector_4xi32_shl_uge_const(<4 x i32> %x) {
 ; Check 'ule' predicate
 define <4 x i1> @vector_4xi32_shl_ule_const(<4 x i32> %x) {
 ; CHECK-LABEL: @vector_4xi32_shl_ule_const(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], <i32 2097088, i32 2097088, i32 2097088, i32 2097088>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 2097088)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
@@ -185,7 +185,7 @@ define <4 x i1> @vector_4xi32_shl_ule_const(<4 x i32> %x) {
 ; Check 'ugt' predicate
 define <4 x i1> @vector_4xi32_shl_ugt_const(<4 x i32> %x) {
 ; CHECK-LABEL: @vector_4xi32_shl_ugt_const(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], <i32 2097088, i32 2097088, i32 2097088, i32 2097088>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], splat (i32 2097088)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll
index f573ff36d2cea4..3ed7fc2589d7a9 100644
--- a/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll
@@ -1392,7 +1392,7 @@ define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
 
 define <4 x i32> @add_or(<4 x i32> %v) {
 ; CHECK-LABEL: @add_or(
-; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5)
 ; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0]], <i32 31, i32 31, i32 65536, i32 65537>
 ; CHECK-NEXT:    ret <4 x i32> [[T3]]
 ;
@@ -1407,7 +1407,7 @@ define <4 x i32> @add_or(<4 x i32> %v) {
 
 define <4 x i8> @or_add(<4 x i8> %v) {
 ; CHECK-LABEL: @or_add(
-; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 -64, i8 -64>
 ; CHECK-NEXT:    ret <4 x i8> [[T3]]
 ;
@@ -1422,7 +1422,7 @@ define <4 x i8> @or_add(<4 x i8> %v) {
 
 define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) {
 ; CHECK-LABEL: @or_add_not_enough_masking(
-; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[T1:%.*]] = or <4 x i8> [[V0]], <i8 poison, i8 poison, i8 -64, i8 -64>
 ; CHECK-NEXT:    [[T2:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 poison, i8 poison>
 ; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
@@ -1439,7 +1439,7 @@ define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) {
 
 define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) {
 ; CHECK-LABEL: @add_or_2_vars(
-; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5)
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
 ; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 31, i32 31, i32 65536, i32 65537>
 ; CHECK-NEXT:    ret <4 x i32> [[T3]]
@@ -1453,7 +1453,7 @@ define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) {
 
 define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) {
 ; CHECK-LABEL: @or_add_2_vars(
-; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
 ; CHECK-NEXT:    [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], <i8 1, i8 2, i8 -64, i8 -64>
 ; CHECK-NEXT:    ret <4 x i8> [[T3]]
diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll
index 7516ea5c0a42cf..21765f74dad789 100644
--- a/llvm/test/Transforms/InstCombine/shuffle_select.ll
+++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll
@@ -1455,7 +1455,7 @@ define <4 x i32> @neg_mul_2_vars(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @add_or(<4 x i32> %v) {
 ; CHECK-LABEL: @add_or(
-; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5)
 ; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0]], <i32 31, i32 31, i32 65536, i32 65537>
 ; CHECK-NEXT:    ret <4 x i32> [[T3]]
 ;
@@ -1481,7 +1481,7 @@ define <4 x i32> @add_or_disjoint(<4 x i32> %v) {
 
 define <4 x i8> @or_add(<4 x i8> %v) {
 ; CHECK-LABEL: @or_add(
-; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 -64, i8 -64>
 ; CHECK-NEXT:    ret <4 x i8> [[T3]]
 ;
@@ -1496,7 +1496,7 @@ define <4 x i8> @or_add(<4 x i8> %v) {
 
 define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) {
 ; CHECK-LABEL: @or_add_not_enough_masking(
-; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[T1:%.*]] = or <4 x i8> [[V0]], <i8 poison, i8 poison, i8 -64, i8 -64>
 ; CHECK-NEXT:    [[T2:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 poison, i8 poison>
 ; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
@@ -1513,7 +1513,7 @@ define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) {
 
 define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) {
 ; CHECK-LABEL: @add_or_2_vars(
-; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5)
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
 ; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 31, i32 31, i32 65536, i32 65537>
 ; CHECK-NEXT:    ret <4 x i32> [[T3]]
@@ -1527,7 +1527,7 @@ define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) {
 
 define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) {
 ; CHECK-LABEL: @or_add_2_vars(
-; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
 ; CHECK-NEXT:    [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], <i8 1, i8 2, i8 -64, i8 -64>
 ; CHECK-NEXT:    ret <4 x i8> [[T3]]
diff --git a/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll
index 4d8481dd759ea6..30a4546790293f 100644
--- a/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll
+++ b/llvm/test/Transforms/InstCombine/signbit-lshr-and-icmpeq-zero.ll
@@ -75,7 +75,7 @@ define i1 @scalar_i32_signbit_lshr_and_ne(i32 %x, i32 %y) {
 
 define <4 x i1> @vec_4xi32_signbit_lshr_and_eq(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_signbit_lshr_and_eq(
-; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[Y:%.*]]
+; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact <4 x i32> splat (i32 -2147483648), [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[LSHR]], [[X:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll
index c97731777283d1..e43b31ad24fe1e 100644
--- a/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll
+++ b/llvm/test/Transforms/InstCombine/signbit-shl-and-icmpeq-zero.ll
@@ -75,7 +75,7 @@ define i1 @scalar_i32_signbit_shl_and_ne(i32 %x, i32 %y) {
 
 define <4 x i1> @vec_4xi32_signbit_shl_and_eq(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @vec_4xi32_signbit_shl_and_eq(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[Y:%.*]]
+; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> splat (i32 -2147483648), [[Y:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[SHL]], [[X:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll
index 513fb69ab7463e..8015af59f4f819 100644
--- a/llvm/test/Transforms/InstCombine/signed-truncation-check.ll
+++ b/llvm/test/Transforms/InstCombine/signed-truncation-check.ll
@@ -173,7 +173,7 @@ define i1 @positive_with_extra_and_logical(i32 %arg, i1 %z) {
 
 define <2 x i1> @positive_vec_splat(<2 x i32> %arg) {
 ; CHECK-LABEL: @positive_vec_splat(
-; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <2 x i32> [[ARG:%.*]], <i32 128, i32 128>
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <2 x i32> [[ARG:%.*]], splat (i32 128)
 ; CHECK-NEXT:    ret <2 x i1> [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp sgt <2 x i32> %arg, <i32 -1, i32 -1>
@@ -185,7 +185,7 @@ define <2 x i1> @positive_vec_splat(<2 x i32> %arg) {
 
 define <2 x i1> @positive_vec_nonsplat(<2 x i32> %arg) {
 ; CHECK-LABEL: @positive_vec_nonsplat(
-; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <2 x i32> [[ARG:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[T1:%.*]] = icmp sgt <2 x i32> [[ARG:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[ARG]], <i32 128, i32 256>
 ; CHECK-NEXT:    [[T3:%.*]] = icmp ult <2 x i32> [[T2]], <i32 256, i32 512>
 ; CHECK-NEXT:    [[T4:%.*]] = and <2 x i1> [[T1]], [[T3]]
@@ -200,7 +200,7 @@ define <2 x i1> @positive_vec_nonsplat(<2 x i32> %arg) {
 
 define <3 x i1> @positive_vec_poison0(<3 x i32> %arg) {
 ; CHECK-LABEL: @positive_vec_poison0(
-; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], <i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128)
 ; CHECK-NEXT:    ret <3 x i1> [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 poison, i32 -1>
@@ -212,7 +212,7 @@ define <3 x i1> @positive_vec_poison0(<3 x i32> %arg) {
 
 define <3 x i1> @positive_vec_poison1(<3 x i32> %arg) {
 ; CHECK-LABEL: @positive_vec_poison1(
-; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], <i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128)
 ; CHECK-NEXT:    ret <3 x i1> [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 -1, i32 -1>
@@ -224,7 +224,7 @@ define <3 x i1> @positive_vec_poison1(<3 x i32> %arg) {
 
 define <3 x i1> @positive_vec_poison2(<3 x i32> %arg) {
 ; CHECK-LABEL: @positive_vec_poison2(
-; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], <i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128)
 ; CHECK-NEXT:    ret <3 x i1> [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 -1, i32 -1>
@@ -236,7 +236,7 @@ define <3 x i1> @positive_vec_poison2(<3 x i32> %arg) {
 
 define <3 x i1> @positive_vec_poison3(<3 x i32> %arg) {
 ; CHECK-LABEL: @positive_vec_poison3(
-; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], <i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128)
 ; CHECK-NEXT:    ret <3 x i1> [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 poison, i32 -1>
@@ -248,7 +248,7 @@ define <3 x i1> @positive_vec_poison3(<3 x i32> %arg) {
 
 define <3 x i1> @positive_vec_poison4(<3 x i32> %arg) {
 ; CHECK-LABEL: @positive_vec_poison4(
-; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], <i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128)
 ; CHECK-NEXT:    ret <3 x i1> [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 poison, i32 -1>
@@ -260,7 +260,7 @@ define <3 x i1> @positive_vec_poison4(<3 x i32> %arg) {
 
 define <3 x i1> @positive_vec_poison5(<3 x i32> %arg) {
 ; CHECK-LABEL: @positive_vec_poison5(
-; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], <i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128)
 ; CHECK-NEXT:    ret <3 x i1> [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 -1, i32 -1>
@@ -272,7 +272,7 @@ define <3 x i1> @positive_vec_poison5(<3 x i32> %arg) {
 
 define <3 x i1> @positive_vec_poison6(<3 x i32> %arg) {
 ; CHECK-LABEL: @positive_vec_poison6(
-; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], <i32 128, i32 128, i32 128>
+; CHECK-NEXT:    [[T4_SIMPLIFIED:%.*]] = icmp ult <3 x i32> [[ARG:%.*]], splat (i32 128)
 ; CHECK-NEXT:    ret <3 x i1> [[T4_SIMPLIFIED]]
 ;
   %t1 = icmp sgt <3 x i32> %arg, <i32 -1, i32 poison, i32 -1>
diff --git a/llvm/test/Transforms/InstCombine/signext.ll b/llvm/test/Transforms/InstCombine/signext.ll
index 765a0470c2e483..f72cf29d2fcc7b 100644
--- a/llvm/test/Transforms/InstCombine/signext.ll
+++ b/llvm/test/Transforms/InstCombine/signext.ll
@@ -34,8 +34,8 @@ define i32 @sextinreg_extra_use(i32 %x) {
 
 define <2 x i32> @sextinreg_splat(<2 x i32> %x) {
 ; CHECK-LABEL: @sextinreg_splat(
-; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], <i32 16, i32 16>
-; CHECK-NEXT:    [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], <i32 16, i32 16>
+; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 16)
+; CHECK-NEXT:    [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], splat (i32 16)
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
   %t1 = and <2 x i32> %x, <i32 65535, i32 65535>
@@ -58,8 +58,8 @@ define i32 @sextinreg_alt(i32 %x) {
 
 define <2 x i32> @sextinreg_alt_splat(<2 x i32> %x) {
 ; CHECK-LABEL: @sextinreg_alt_splat(
-; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], <i32 16, i32 16>
-; CHECK-NEXT:    [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], <i32 16, i32 16>
+; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 16)
+; CHECK-NEXT:    [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], splat (i32 16)
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
   %t1 = and <2 x i32> %x, <i32 65535, i32 65535>
@@ -119,8 +119,8 @@ define i32 @sextinreg2(i32 %x) {
 
 define <2 x i32> @sextinreg2_splat(<2 x i32> %x) {
 ; CHECK-LABEL: @sextinreg2_splat(
-; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], <i32 24, i32 24>
-; CHECK-NEXT:    [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], <i32 24, i32 24>
+; CHECK-NEXT:    [[SEXT:%.*]] = shl <2 x i32> [[X:%.*]], splat (i32 24)
+; CHECK-NEXT:    [[T3:%.*]] = ashr exact <2 x i32> [[SEXT]], splat (i32 24)
 ; CHECK-NEXT:    ret <2 x i32> [[T3]]
 ;
   %t1 = and <2 x i32> %x, <i32 255, i32 255>
@@ -181,7 +181,7 @@ define i32 @ashr(i32 %x) {
 
 define <2 x i32> @ashr_splat(<2 x i32> %x) {
 ; CHECK-LABEL: @ashr_splat(
-; CHECK-NEXT:    [[SUB:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[SUB:%.*]] = ashr <2 x i32> [[X:%.*]], splat (i32 5)
 ; CHECK-NEXT:    ret <2 x i32> [[SUB]]
 ;
   %shr = lshr <2 x i32> %x, <i32 5, i32 5>
diff --git a/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll b/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll
index e7505721cad604..b82e33cd2da83e 100644
--- a/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll
+++ b/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll
@@ -121,7 +121,7 @@ define i32 @n7(i16 %x) {
 define <2 x i32> @t8(<2 x i16> %x) {
 ; CHECK-LABEL: @t8(
 ; CHECK-NEXT:    [[X_SIGNEXT:%.*]] = sext <2 x i16> [[X:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[X_SIGNEXT]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[X_SIGNEXT]], splat (i32 -2147483648)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %i0 = zext <2 x i16> %x to <2 x i32>
@@ -133,7 +133,7 @@ define <2 x i32> @t8(<2 x i16> %x) {
 define <2 x i32> @t9(<2 x i16> %x) {
 ; CHECK-LABEL: @t9(
 ; CHECK-NEXT:    [[I1:%.*]] = sext <2 x i16> [[X:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[I1]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[I1]], splat (i32 -2147483648)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %i0 = zext <2 x i16> %x to <2 x i32>
@@ -146,7 +146,7 @@ define <2 x i32> @t9(<2 x i16> %x) {
 define <2 x i32> @t10_undef(<2 x i16> %x) {
 ; CHECK-LABEL: @t10_undef(
 ; CHECK-NEXT:    [[I0:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[I1:%.*]] = shl nuw <2 x i32> [[I0]], <i32 16, i32 16>
+; CHECK-NEXT:    [[I1:%.*]] = shl nuw <2 x i32> [[I0]], splat (i32 16)
 ; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[I1]], <i32 -2147483648, i32 undef>
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
index 65faf0974b503f..cfefba5a6cb051 100644
--- a/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll
@@ -158,16 +158,16 @@ entry:
 define nofpclass(pinf) { <2 x float> } @ret_nofpclass_vector_elems_struct_ty_pinf__ninf() {
 ; CHECK-LABEL: define nofpclass(pinf) { <2 x float> } @ret_nofpclass_vector_elems_struct_ty_pinf__ninf() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret { <2 x float> } { <2 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000> }
+; CHECK-NEXT:    ret { <2 x float> } { <2 x float> splat (float 0xFFF0000000000000) }
 ;
 entry:
   ret { <2 x float>} { <2 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000> }
 }
 
 define nofpclass(pinf) [ 1 x [ 1 x float ]] @ret_nofpclass_nested_array_ty_pinf__ninf() {
-; CHECK-LABEL: @ret_nofpclass_nested_array_ty_pinf__ninf() {
+; CHECK-LABEL: define nofpclass(pinf) [1 x [1 x float]] @ret_nofpclass_nested_array_ty_pinf__ninf() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret {{.*}}float 0xFFF0000000000000
+; CHECK-NEXT:    ret [1 x [1 x float]] [[1 x float] [float 0xFFF0000000000000]]
 ;
 entry:
   ret [ 1 x [ 1 x float ]] [[ 1 x float ] [float 0xFFF0000000000000]]
diff --git a/llvm/test/Transforms/InstCombine/sitofp.ll b/llvm/test/Transforms/InstCombine/sitofp.ll
index 51eff39cd900e2..9574c68025e95f 100644
--- a/llvm/test/Transforms/InstCombine/sitofp.ll
+++ b/llvm/test/Transforms/InstCombine/sitofp.ll
@@ -408,7 +408,7 @@ define <2 x i1> @i8_vec_sitofp_test3(<2 x i8> %A) {
 
 define <2 x i1> @i8_vec_sitofp_test4(<2 x i8> %A) {
 ; CHECK-LABEL: @i8_vec_sitofp_test4(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], splat (i8 127)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %B = sitofp <2 x i8> %A to <2 x double>
diff --git a/llvm/test/Transforms/InstCombine/smin-icmp.ll b/llvm/test/Transforms/InstCombine/smin-icmp.ll
index d1283d8afc0a74..1def7eaf10d573 100644
--- a/llvm/test/Transforms/InstCombine/smin-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/smin-icmp.ll
@@ -956,24 +956,24 @@ define void @eq_smin_v2i32(<2 x i32> %x, <2 x i32> %y) {
 ; icmp pred smin(C1, Y), C2 where C1 == C2
 define void @eq_smin_v2i32_constant(<2 x i32> %y) {
 ; CHECK-LABEL: @eq_smin_v2i32_constant(
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> <i32 10, i32 10>)
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], <i32 10, i32 10>
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> splat (i32 10))
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP1]])
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> <i1 true, i1 true>)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> zeroinitializer)
-; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 9)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP4]])
-; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP5]])
-; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], <i32 11, i32 11>
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP6]])
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP7]])
-; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP8]])
-; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt <2 x i32> [[Y]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 9)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP9]])
-; CHECK-NEXT:    [[CMP10:%.*]] = icmp slt <2 x i32> [[Y]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP10:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP10]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1004,18 +1004,18 @@ define void @eq_smin_v2i32_constant(<2 x i32> %y) {
 ; icmp pred smin(C1, Y), C2 where C1 < C2
 define void @slt_smin_v2i32_constant(<2 x i32> %y) {
 ; CHECK-LABEL: @slt_smin_v2i32_constant(
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> <i32 5, i32 5>)
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> splat (i32 5))
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> <i1 true, i1 true>)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> <i1 true, i1 true>)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> zeroinitializer)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> zeroinitializer)
-; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP5]])
-; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], <i32 11, i32 11>
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP6]])
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP7]])
-; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP8]])
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> zeroinitializer)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> <i1 true, i1 true>)
@@ -1049,23 +1049,23 @@ define void @slt_smin_v2i32_constant(<2 x i32> %y) {
 define void @sle_smin_v2i32_constant(<2 x i32> %y) {
 ; CHECK-LABEL: @sle_smin_v2i32_constant(
 ; CHECK-NEXT:    [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> <i32 5, i32 10>)
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP1]])
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> <i1 true, i1 true>)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> zeroinitializer)
-; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt <2 x i32> [[COND]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt <2 x i32> [[COND]], splat (i32 9)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP4]])
-; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP5]])
-; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], <i32 11, i32 11>
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP6]])
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP7]])
-; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP8]])
-; CHECK-NEXT:    [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP9]])
-; CHECK-NEXT:    [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP10]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1096,26 +1096,26 @@ define void @sle_smin_v2i32_constant(<2 x i32> %y) {
 ; icmp pred smin(C1, Y), C2 where C1 > C2
 define void @sgt_smin_v2i32_constant(<2 x i32> %y) {
 ; CHECK-LABEL: @sgt_smin_v2i32_constant(
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> <i32 15, i32 15>)
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], <i32 10, i32 10>
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> splat (i32 15))
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP1]])
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt <2 x i32> [[Y]], <i32 11, i32 11>
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 11)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP2]])
-; CHECK-NEXT:    [[CMP3:%.*]] = icmp sgt <2 x i32> [[Y]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP3]])
-; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 9)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP4]])
-; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP5]])
-; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], <i32 11, i32 11>
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP6]])
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP7]])
-; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP8]])
-; CHECK-NEXT:    [[CMP9:%.*]] = icmp eq <2 x i32> [[Y]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP9:%.*]] = icmp eq <2 x i32> [[Y]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP9]])
-; CHECK-NEXT:    [[CMP10:%.*]] = icmp ne <2 x i32> [[Y]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP10:%.*]] = icmp ne <2 x i32> [[Y]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP10]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1147,25 +1147,25 @@ define void @sgt_smin_v2i32_constant(<2 x i32> %y) {
 define void @sge_smin_v2i32_constant(<2 x i32> %y) {
 ; CHECK-LABEL: @sge_smin_v2i32_constant(
 ; CHECK-NEXT:    [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> <i32 15, i32 10>)
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[Y]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP1]])
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt <2 x i32> [[COND]], <i32 11, i32 11>
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt <2 x i32> [[COND]], splat (i32 11)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP2]])
-; CHECK-NEXT:    [[CMP3:%.*]] = icmp sgt <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp sgt <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP3]])
-; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt <2 x i32> [[Y]], splat (i32 9)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP4]])
-; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP5]])
-; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], <i32 11, i32 11>
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP6]])
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP7]])
-; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP8]])
-; CHECK-NEXT:    [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP9]])
-; CHECK-NEXT:    [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP10]])
 ; CHECK-NEXT:    ret void
 ;
@@ -1197,25 +1197,25 @@ define void @sge_smin_v2i32_constant(<2 x i32> %y) {
 define void @unknown_smin_v2i32_constant(<2 x i32> %y) {
 ; CHECK-LABEL: @unknown_smin_v2i32_constant(
 ; CHECK-NEXT:    [[COND:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> <i32 5, i32 15>)
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP1]])
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt <2 x i32> [[COND]], <i32 11, i32 11>
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt <2 x i32> [[COND]], splat (i32 11)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP2]])
-; CHECK-NEXT:    [[CMP3:%.*]] = icmp sgt <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp sgt <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP3]])
-; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt <2 x i32> [[COND]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt <2 x i32> [[COND]], splat (i32 9)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP4]])
-; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP5:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP5]])
-; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], <i32 11, i32 11>
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult <2 x i32> [[COND]], splat (i32 11)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP6]])
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP7]])
-; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], <i32 9, i32 9>
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt <2 x i32> [[COND]], splat (i32 9)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP8]])
-; CHECK-NEXT:    [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP9:%.*]] = icmp eq <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP9]])
-; CHECK-NEXT:    [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], <i32 10, i32 10>
+; CHECK-NEXT:    [[CMP10:%.*]] = icmp ne <2 x i32> [[COND]], splat (i32 10)
 ; CHECK-NEXT:    call void @use_v2i1(<2 x i1> [[CMP10]])
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/InstCombine/sqrt.ll b/llvm/test/Transforms/InstCombine/sqrt.ll
index f72fe5a6a5817b..0f4db3b3a65ae7 100644
--- a/llvm/test/Transforms/InstCombine/sqrt.ll
+++ b/llvm/test/Transforms/InstCombine/sqrt.ll
@@ -192,7 +192,7 @@ define double @sqrt_exp_intr_and_libcall_2(double %x) {
 
 define <2 x float> @sqrt_exp_vec(<2 x float> %x) {
 ; CHECK-LABEL: @sqrt_exp_vec(
-; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc <2 x float> [[X:%.*]], <float 5.000000e-01, float 5.000000e-01>
+; CHECK-NEXT:    [[MERGED_SQRT:%.*]] = fmul reassoc <2 x float> [[X:%.*]], splat (float 5.000000e-01)
 ; CHECK-NEXT:    [[E:%.*]] = call reassoc <2 x float> @llvm.exp.v2f32(<2 x float> [[MERGED_SQRT]])
 ; CHECK-NEXT:    ret <2 x float> [[E]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll b/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll
index 70fd4eefe74f5d..3607b191664963 100644
--- a/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll
+++ b/llvm/test/Transforms/InstCombine/ssub-with-overflow.ll
@@ -53,7 +53,7 @@ define { i8, i1 } @no_fold_on_constant_sub_overflow(i8 %x) {
 
 define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) {
 ; CHECK-LABEL: @fold_simple_splat_constant(
-; CHECK-NEXT:    [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 -42, i32 -42>)
+; CHECK-NEXT:    [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 -42))
 ; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
 ;
   %a = sub nsw <2 x i32> %x, <i32 12, i32 12>
@@ -64,7 +64,7 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) {
 define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) {
 ; CHECK-LABEL: @no_fold_splat_undef_constant(
 ; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[X:%.*]], <i32 -12, i32 undef>
-; CHECK-NEXT:    [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 -30, i32 -30>)
+; CHECK-NEXT:    [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 -30))
 ; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
 ;
   %a = sub nsw <2 x i32> %x, <i32 12, i32 undef>
@@ -75,7 +75,7 @@ define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) {
 define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @no_fold_splat_not_constant(
 ; CHECK-NEXT:    [[A:%.*]] = sub nsw <2 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 -30, i32 -30>)
+; CHECK-NEXT:    [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 -30))
 ; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
 ;
   %a = sub nsw <2 x i32> %x, %y
diff --git a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
index e21ca605fc5af5..9774f6db555440 100644
--- a/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/sub-ashr-or-to-icmp-select.ll
@@ -130,7 +130,7 @@ define i32 @neg_or_ashr_i32_commute(i32 %x0) {
 define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
+; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
   %sub = sub nsw <4 x i32> %y, %x
@@ -142,7 +142,7 @@ define <4 x i32> @sub_ashr_or_i32_vec(<4 x i32> %x, <4 x i32> %y) {
 define <4 x i32> @sub_ashr_or_i32_vec_nuw_nsw(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec_nuw_nsw(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
+; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
   %sub = sub nuw nsw <4 x i32> %y, %x
@@ -166,7 +166,7 @@ define <4 x i32> @neg_or_ashr_i32_vec(<4 x i32> %x) {
 define <4 x i32> @sub_ashr_or_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec_commute(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[X]]
+; CHECK-NEXT:    [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -1), <4 x i32> [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
   %sub = sub nsw <4 x i32> %y, %x
@@ -177,7 +177,7 @@ define <4 x i32> @sub_ashr_or_i32_vec_commute(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @neg_or_ashr_i32_vec_commute(<4 x i32> %x0) {
 ; CHECK-LABEL: @neg_or_ashr_i32_vec_commute(
-; CHECK-NEXT:    [[X:%.*]] = sdiv <4 x i32> <i32 42, i32 42, i32 42, i32 42>, [[X0:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = sdiv <4 x i32> splat (i32 42), [[X0:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[X]], zeroinitializer
 ; CHECK-NEXT:    [[SHR:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[SHR]]
@@ -283,7 +283,7 @@ define i32 @neg_or_extra_use_ashr_i32(i32 %x, ptr %p) {
 define <4 x i32> @sub_ashr_or_i32_vec_undef1(<4 x i32> %x) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec_undef1(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> <i32 255, i32 255, i32 undef, i32 255>, [[X:%.*]]
-; CHECK-NEXT:    [[SHR:%.*]] = ashr <4 x i32> [[SUB]], <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT:    [[SHR:%.*]] = ashr <4 x i32> [[SUB]], splat (i32 31)
 ; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[SHR]], [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
 ;
@@ -295,7 +295,7 @@ define <4 x i32> @sub_ashr_or_i32_vec_undef1(<4 x i32> %x) {
 
 define <4 x i32> @sub_ashr_or_i32_vec_undef2(<4 x i32> %x) {
 ; CHECK-LABEL: @sub_ashr_or_i32_vec_undef2(
-; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> <i32 255, i32 255, i32 255, i32 255>, [[X:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw <4 x i32> splat (i32 255), [[X:%.*]]
 ; CHECK-NEXT:    [[SHR:%.*]] = ashr <4 x i32> [[SUB]], <i32 undef, i32 31, i32 31, i32 31>
 ; CHECK-NEXT:    [[OR:%.*]] = or <4 x i32> [[SHR]], [[X]]
 ; CHECK-NEXT:    ret <4 x i32> [[OR]]
diff --git a/llvm/test/Transforms/InstCombine/sub-lshr-or-to-icmp-select.ll b/llvm/test/Transforms/InstCombine/sub-lshr-or-to-icmp-select.ll
index 33c02d77c45b90..1010aab7cbb0b0 100644
--- a/llvm/test/Transforms/InstCombine/sub-lshr-or-to-icmp-select.ll
+++ b/llvm/test/Transforms/InstCombine/sub-lshr-or-to-icmp-select.ll
@@ -47,7 +47,7 @@ define <4 x i32> @neg_or_lshr_i32_vec(<4 x i32> %x) {
 
 define <4 x i32> @neg_or_lshr_i32_vec_commute(<4 x i32> %x0) {
 ; CHECK-LABEL: @neg_or_lshr_i32_vec_commute(
-; CHECK-NEXT:    [[X:%.*]] = sdiv <4 x i32> <i32 42, i32 42, i32 42, i32 42>, [[X0:%.*]]
+; CHECK-NEXT:    [[X:%.*]] = sdiv <4 x i32> splat (i32 42), [[X0:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[X]], zeroinitializer
 ; CHECK-NEXT:    [[SHR:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[SHR]]
diff --git a/llvm/test/Transforms/InstCombine/sub-not.ll b/llvm/test/Transforms/InstCombine/sub-not.ll
index 5053319162f0d2..9fa55a6896126a 100644
--- a/llvm/test/Transforms/InstCombine/sub-not.ll
+++ b/llvm/test/Transforms/InstCombine/sub-not.ll
@@ -29,7 +29,7 @@ define i8 @sub_not_extra_use(i8 %x, i8 %y) {
 
 define <2 x i8> @sub_not_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @sub_not_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[Y:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -64,7 +64,7 @@ define i8 @dec_sub_extra_use(i8 %x, i8 %y) {
 
 define <2 x i8> @dec_sub_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @dec_sub_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -99,7 +99,7 @@ define i8 @sub_inc_extra_use(i8 %x, i8 %y) {
 
 define <2 x i8> @sub_inc_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @sub_inc_vec(
-; CHECK-NEXT:    [[S_NEG:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[S_NEG:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[Y:%.*]], [[S_NEG]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -134,7 +134,7 @@ define i8 @sub_dec_extra_use(i8 %x, i8 %y) {
 
 define <2 x i8> @sub_dec_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @sub_dec_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll
index b19eae4d8f9a41..5e3dd3daa41d99 100644
--- a/llvm/test/Transforms/InstCombine/sub-of-negatible.ll
+++ b/llvm/test/Transforms/InstCombine/sub-of-negatible.ll
@@ -1458,8 +1458,8 @@ if.end:
 
 define <1 x i64> @PR56601(<1 x i64> %x, <1 x i64> %y) {
 ; CHECK-LABEL: @PR56601(
-; CHECK-NEXT:    [[M1:%.*]] = mul nsw <1 x i64> [[X:%.*]], <i64 42>
-; CHECK-NEXT:    [[M2:%.*]] = mul nsw <1 x i64> [[Y:%.*]], <i64 12>
+; CHECK-NEXT:    [[M1:%.*]] = mul nsw <1 x i64> [[X:%.*]], splat (i64 42)
+; CHECK-NEXT:    [[M2:%.*]] = mul nsw <1 x i64> [[Y:%.*]], splat (i64 12)
 ; CHECK-NEXT:    [[A1:%.*]] = add <1 x i64> [[M1]], <i64 add (i64 ptrtoint (ptr @g to i64), i64 -4)>
 ; CHECK-NEXT:    [[A2:%.*]] = add <1 x i64> [[M2]], <i64 add (i64 ptrtoint (ptr @g to i64), i64 -3)>
 ; CHECK-NEXT:    [[R:%.*]] = sub <1 x i64> [[A1]], [[A2]]
diff --git a/llvm/test/Transforms/InstCombine/sub-xor.ll b/llvm/test/Transforms/InstCombine/sub-xor.ll
index b4add9698b1609..6b8ccbd08a5f61 100644
--- a/llvm/test/Transforms/InstCombine/sub-xor.ll
+++ b/llvm/test/Transforms/InstCombine/sub-xor.ll
@@ -16,8 +16,8 @@ define i32 @low_mask_nsw_nuw(i32 %x) {
 
 define <2 x i32> @low_mask_nsw_nuw_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @low_mask_nsw_nuw_vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 31, i32 31>
-; CHECK-NEXT:    [[SUB:%.*]] = xor <2 x i32> [[AND]], <i32 63, i32 63>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 31)
+; CHECK-NEXT:    [[SUB:%.*]] = xor <2 x i32> [[AND]], splat (i32 63)
 ; CHECK-NEXT:    ret <2 x i32> [[SUB]]
 ;
   %and = and <2 x i32> %x, <i32 31, i32 31>
@@ -135,8 +135,8 @@ define i32 @xor_add_extra_use(i32 %x) {
 
 define <2 x i8> @xor_add_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @xor_add_splat(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 24, i8 24>
-; CHECK-NEXT:    [[ADD:%.*]] = sub nuw nsw <2 x i8> <i8 105, i8 105>, [[AND]]
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 24)
+; CHECK-NEXT:    [[ADD:%.*]] = sub nuw nsw <2 x i8> splat (i8 105), [[AND]]
 ; CHECK-NEXT:    ret <2 x i8> [[ADD]]
 ;
   %and = and <2 x i8> %x, <i8 24, i8 24>
@@ -147,9 +147,9 @@ define <2 x i8> @xor_add_splat(<2 x i8> %x) {
 
 define <2 x i8> @xor_add_splat_undef(<2 x i8> %x) {
 ; CHECK-LABEL: @xor_add_splat_undef(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 24, i8 24>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 24)
 ; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i8> [[AND]], <i8 63, i8 undef>
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[XOR]], <i8 42, i8 42>
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[XOR]], splat (i8 42)
 ; CHECK-NEXT:    ret <2 x i8> [[ADD]]
 ;
   %and = and <2 x i8> %x, <i8 24, i8 24>
diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll
index ba596e10e8b3fe..6392b0c0c0714d 100644
--- a/llvm/test/Transforms/InstCombine/sub.ll
+++ b/llvm/test/Transforms/InstCombine/sub.ll
@@ -320,7 +320,7 @@ define i32 @test13(i32 %A) {
 
 define <2 x i32> @test12vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test12vec(
-; CHECK-NEXT:    [[B_NEG:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[B_NEG:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i32> [[B_NEG]]
 ;
   %B = ashr <2 x i32> %A, <i32 31, i32 31>
@@ -330,7 +330,7 @@ define <2 x i32> @test12vec(<2 x i32> %A) {
 
 define <2 x i32> @test13vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test13vec(
-; CHECK-NEXT:    [[B_NEG:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[B_NEG:%.*]] = ashr <2 x i32> [[A:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i32> [[B_NEG]]
 ;
   %B = lshr <2 x i32> %A, <i32 31, i32 31>
@@ -619,7 +619,7 @@ define <2 x i32> @test27vec(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i32> @test27vecsplat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @test27vecsplat(
-; CHECK-NEXT:    [[MUL_NEG:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[MUL_NEG:%.*]] = shl <2 x i32> [[Y:%.*]], splat (i32 3)
 ; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[MUL_NEG]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[SUB]]
 ;
@@ -663,7 +663,7 @@ define <2 x i32> @test27commutedvec(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i32> @test27commutedvecsplat(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @test27commutedvecsplat(
-; CHECK-NEXT:    [[MUL_NEG:%.*]] = shl <2 x i32> [[Y:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[MUL_NEG:%.*]] = shl <2 x i32> [[Y:%.*]], splat (i32 3)
 ; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[MUL_NEG]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[SUB]]
 ;
@@ -749,7 +749,7 @@ define <2 x i64> @test36(<2 x i64> %A) {
 
 define <2 x i32> @test37(<2 x i32> %A) {
 ; CHECK-LABEL: @test37(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[A:%.*]], <i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[A:%.*]], splat (i32 -2147483648)
 ; CHECK-NEXT:    [[DIV_NEG:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[DIV_NEG]]
 ;
@@ -832,7 +832,7 @@ define i32 @test44(i32 %x) {
 
 define <2 x i32> @test44vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test44vec(
-; CHECK-NEXT:    [[SUB:%.*]] = add nsw <2 x i32> [[X:%.*]], <i32 -32768, i32 -32768>
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw <2 x i32> [[X:%.*]], splat (i32 -32768)
 ; CHECK-NEXT:    ret <2 x i32> [[SUB]]
 ;
   %sub = sub nsw <2 x i32> %x, <i32 32768, i32 32768>
@@ -850,7 +850,7 @@ define <vscale x 2 x i32> @test44scalablevec(<vscale x 2 x i32> %x) {
 
 define <2 x i16> @test44vecminval(<2 x i16> %x) {
 ; CHECK-LABEL: @test44vecminval(
-; CHECK-NEXT:    [[SUB:%.*]] = xor <2 x i16> [[X:%.*]], <i16 -32768, i16 -32768>
+; CHECK-NEXT:    [[SUB:%.*]] = xor <2 x i16> [[X:%.*]], splat (i16 -32768)
 ; CHECK-NEXT:    ret <2 x i16> [[SUB]]
 ;
   %sub = sub nsw <2 x i16> %x, <i16 -32768, i16 -32768>
@@ -1001,7 +1001,7 @@ define i32 @test54(i1 %C) {
 
 define <2 x i32> @test54vec(i1 %C) {
 ; CHECK-LABEL: @test54vec(
-; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 -877, i32 -877>, <2 x i32> <i32 113, i32 113>
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 -877), <2 x i32> splat (i32 113)
 ; CHECK-NEXT:    ret <2 x i32> [[V]]
 ;
   %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
@@ -1048,7 +1048,7 @@ define <2 x i32> @test55vec(i1 %which) {
 ; CHECK:       delay:
 ; CHECK-NEXT:    br label [[FINAL]]
 ; CHECK:       final:
-; CHECK-NEXT:    [[A_NEG:%.*]] = phi <2 x i32> [ <i32 -877, i32 -877>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ]
+; CHECK-NEXT:    [[A_NEG:%.*]] = phi <2 x i32> [ splat (i32 -877), [[ENTRY:%.*]] ], [ splat (i32 113), [[DELAY]] ]
 ; CHECK-NEXT:    ret <2 x i32> [[A_NEG]]
 ;
 entry:
@@ -1228,8 +1228,8 @@ define i32 @test62(i32 %A) {
 
 define <2 x i32> @test62vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test62vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[C:%.*]] = sub <2 x i32> <i32 2, i32 2>, [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[C:%.*]] = sub <2 x i32> splat (i32 2), [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i32> [[C]]
 ;
   %B = sub <2 x i32> <i32 1, i32 1>, %A
@@ -1250,7 +1250,7 @@ define i32 @test63(i32 %A) {
 
 define <2 x i32> @test63vec(<2 x i32> %A) {
 ; CHECK-LABEL: @test63vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
 ;
   %B = sub <2 x i32> <i32 1, i32 1>, %A
@@ -1314,8 +1314,8 @@ define i32 @test67(i32 %x) {
 ; Check splat vectors too
 define <2 x i32> @test68(<2 x i32> %x) {
 ; CHECK-LABEL: @test68(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 255, i32 255>)
-; CHECK-NEXT:    [[DOTNEG:%.*]] = add nsw <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 255))
+; CHECK-NEXT:    [[DOTNEG:%.*]] = add nsw <2 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[DOTNEG]]
 ;
   %1 = xor <2 x i32> %x, <i32 -1, i32 -1>
@@ -1329,7 +1329,7 @@ define <2 x i32> @test68(<2 x i32> %x) {
 define <2 x i32> @test69(<2 x i32> %x) {
 ; CHECK-LABEL: @test69(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 255, i32 127>)
-; CHECK-NEXT:    [[DOTNEG:%.*]] = add <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[DOTNEG:%.*]] = add <2 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[DOTNEG]]
 ;
   %1 = xor <2 x i32> %x, <i32 -1, i32 -1>
@@ -1367,7 +1367,7 @@ define i32 @test71(i32 %A, i32 %B) {
 ; Check (X | Y) - Y --> X & ~Y where X and Y are vectors
 define <2 x i32> @test72(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test72(
-; CHECK-NEXT:    [[B_NOT:%.*]] = xor <2 x i32> [[B:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[B_NOT:%.*]] = xor <2 x i32> [[B:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[D:%.*]] = and <2 x i32> [[A:%.*]], [[B_NOT]]
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
@@ -1552,10 +1552,10 @@ define i8 @sub_not_mask_lowbits(i8 %x) {
 
 define <2 x i8> @sub_mask_lowbits_splat_extra_use(<2 x i8> %x, ptr %p) {
 ; CHECK-LABEL: @sub_mask_lowbits_splat_extra_use(
-; CHECK-NEXT:    [[A2:%.*]] = and <2 x i8> [[X:%.*]], <i8 10, i8 10>
+; CHECK-NEXT:    [[A2:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 10)
 ; CHECK-NEXT:    store <2 x i8> [[A2]], ptr [[P:%.*]], align 2
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X]], <i8 -11, i8 -11>
-; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], <i8 -64, i8 -64>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X]], splat (i8 -11)
+; CHECK-NEXT:    [[R:%.*]] = add <2 x i8> [[TMP1]], splat (i8 -64)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %a1 = add <2 x i8> %x, <i8 192, i8 192> ; 0xc0
@@ -2056,7 +2056,7 @@ define i8 @mul_sub_common_factor_commute1(i8 %x, i8 %y) {
 
 define <2 x i8> @mul_sub_common_factor_commute2(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @mul_sub_common_factor_commute2(
-; CHECK-NEXT:    [[M1:%.*]] = add <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[M1:%.*]] = add <2 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[A:%.*]] = mul <2 x i8> [[M1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[A]]
 ;
@@ -2204,9 +2204,9 @@ define i8 @shrink_sub_from_constant_lowbits2(i8 %x) {
 
 define <2 x i8> @shrink_sub_from_constant_lowbits3(<2 x i8> %x) {
 ; CHECK-LABEL: @shrink_sub_from_constant_lowbits3(
-; CHECK-NEXT:    [[X0000:%.*]] = shl <2 x i8> [[X:%.*]], <i8 4, i8 4>
-; CHECK-NEXT:    [[SUB:%.*]] = sub nuw <2 x i8> <i8 24, i8 24>, [[X0000]]
-; CHECK-NEXT:    [[R:%.*]] = lshr exact <2 x i8> [[SUB]], <i8 3, i8 3>
+; CHECK-NEXT:    [[X0000:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 4)
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 24), [[X0000]]
+; CHECK-NEXT:    [[R:%.*]] = lshr exact <2 x i8> [[SUB]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %x0000 = shl <2 x i8> %x, <i8 4, i8 4>     ; 4 low bits are known zero
@@ -2373,8 +2373,8 @@ define i10 @sub_to_and_negative4(i10 %x) {
 
 define <2 x i8> @sub_to_and_vector1(<2 x i8> %x) {
 ; CHECK-LABEL: @sub_to_and_vector1(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], <i8 120, i8 120>
-; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[TMP1]], <i8 -9, i8 -9>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 120)
+; CHECK-NEXT:    [[R:%.*]] = add nsw <2 x i8> [[TMP1]], splat (i8 -9)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %sub = sub nuw <2 x i8> <i8 71, i8 71>, %x
@@ -2387,8 +2387,8 @@ define <2 x i8> @sub_to_and_vector1(<2 x i8> %x) {
 define <2 x i8> @sub_to_and_vector2(<2 x i8> %x) {
 ; CHECK-LABEL: @sub_to_and_vector2(
 ; CHECK-NEXT:    [[SUB:%.*]] = sub nuw <2 x i8> <i8 71, i8 poison>, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[SUB]], <i8 120, i8 120>
-; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i8> <i8 77, i8 77>, [[AND]]
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[SUB]], splat (i8 120)
+; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i8> splat (i8 77), [[AND]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %sub = sub nuw <2 x i8> <i8 71, i8 poison>, %x
@@ -2400,9 +2400,9 @@ define <2 x i8> @sub_to_and_vector2(<2 x i8> %x) {
 
 define <2 x i8> @sub_to_and_vector3(<2 x i8> %x) {
 ; CHECK-LABEL: @sub_to_and_vector3(
-; CHECK-NEXT:    [[SUB:%.*]] = sub nuw <2 x i8> <i8 71, i8 71>, [[X:%.*]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 71), [[X:%.*]]
 ; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[SUB]], <i8 120, i8 poison>
-; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i8> <i8 44, i8 44>, [[AND]]
+; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i8> splat (i8 44), [[AND]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %sub = sub nuw <2 x i8> <i8 71, i8 71>, %x
@@ -2414,8 +2414,8 @@ define <2 x i8> @sub_to_and_vector3(<2 x i8> %x) {
 
 define <2 x i8> @sub_to_and_vector4(<2 x i8> %x) {
 ; CHECK-LABEL: @sub_to_and_vector4(
-; CHECK-NEXT:    [[SUB:%.*]] = sub nuw <2 x i8> <i8 71, i8 71>, [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[SUB]], <i8 120, i8 120>
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 71), [[X:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[SUB]], splat (i8 120)
 ; CHECK-NEXT:    [[R:%.*]] = sub nsw <2 x i8> <i8 88, i8 poison>, [[AND]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll b/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll
index 4593730b8809f4..6a9b31f3c040e6 100644
--- a/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll
+++ b/llvm/test/Transforms/InstCombine/trunc-binop-ext.ll
@@ -401,7 +401,7 @@ define <8 x i8> @narrow_zext_ashr_keep_trunc_vector(<8 x i8> %i1, <8 x i8> %i2)
 ; CHECK-NEXT:    [[I1_EXT:%.*]] = sext <8 x i8> [[I1]] to <8 x i32>
 ; CHECK-NEXT:    [[I2_EXT:%.*]] = sext <8 x i8> [[I2]] to <8 x i32>
 ; CHECK-NEXT:    [[SUB:%.*]] = add nsw <8 x i32> [[I1_EXT]], [[I2_EXT]]
-; CHECK-NEXT:    [[SHIFT:%.*]] = lshr <8 x i32> [[SUB]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[SHIFT:%.*]] = lshr <8 x i32> [[SUB]], splat (i32 1)
 ; CHECK-NEXT:    [[T:%.*]] = trunc <8 x i32> [[SHIFT]] to <8 x i8>
 ; CHECK-NEXT:    ret <8 x i8> [[T]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/trunc-demand.ll b/llvm/test/Transforms/InstCombine/trunc-demand.ll
index 9d7bf589268e2b..1b3eb95b6a9ca6 100644
--- a/llvm/test/Transforms/InstCombine/trunc-demand.ll
+++ b/llvm/test/Transforms/InstCombine/trunc-demand.ll
@@ -113,7 +113,7 @@ define <2 x i7> @trunc_lshr_vec_splat_exact_mask(<2 x i16> %x) {
 
 define <2 x i7> @trunc_lshr_big_shift(<2 x i16> %x) {
 ; CHECK-LABEL: @trunc_lshr_big_shift(
-; CHECK-NEXT:    [[S:%.*]] = lshr <2 x i16> [[X:%.*]], <i16 7, i16 7>
+; CHECK-NEXT:    [[S:%.*]] = lshr <2 x i16> [[X:%.*]], splat (i16 7)
 ; CHECK-NEXT:    [[T:%.*]] = trunc <2 x i16> [[S]] to <2 x i7>
 ; CHECK-NEXT:    [[R:%.*]] = and <2 x i7> [[T]], <i7 1, i7 1>
 ; CHECK-NEXT:    ret <2 x i7> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll
index 063006ba5eea8b..33fa2c375f1eca 100644
--- a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll
@@ -24,7 +24,7 @@ define i64 @test1(i64 %a) {
 define <2 x i64> @test1_vec(<2 x i64> %a) {
 ; CHECK-LABEL: @test1_vec(
 ; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[D:%.*]] = and <2 x i64> [[A]], <i64 15, i64 15>
+; CHECK-NEXT:    [[D:%.*]] = and <2 x i64> [[A]], splat (i64 15)
 ; CHECK-NEXT:    call void @use_vec(<2 x i32> [[B]])
 ; CHECK-NEXT:    ret <2 x i64> [[D]]
 ;
@@ -82,8 +82,8 @@ define i64 @test2(i64 %a) {
 define <2 x i64> @test2_vec(<2 x i64> %a) {
 ; CHECK-LABEL: @test2_vec(
 ; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[D1:%.*]] = shl <2 x i64> [[A]], <i64 36, i64 36>
-; CHECK-NEXT:    [[D:%.*]] = ashr exact <2 x i64> [[D1]], <i64 36, i64 36>
+; CHECK-NEXT:    [[D1:%.*]] = shl <2 x i64> [[A]], splat (i64 36)
+; CHECK-NEXT:    [[D:%.*]] = ashr exact <2 x i64> [[D1]], splat (i64 36)
 ; CHECK-NEXT:    call void @use_vec(<2 x i32> [[B]])
 ; CHECK-NEXT:    ret <2 x i64> [[D]]
 ;
@@ -229,8 +229,8 @@ define i32 @trunc_ashr(i32 %X) {
 
 define <2 x i32> @trunc_ashr_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @trunc_ashr_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 8, i32 8>
-; CHECK-NEXT:    [[C:%.*]] = or <2 x i32> [[TMP1]], <i32 -8388608, i32 -8388608>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 8)
+; CHECK-NEXT:    [[C:%.*]] = or <2 x i32> [[TMP1]], splat (i32 -8388608)
 ; CHECK-NEXT:    ret <2 x i32> [[C]]
 ;
   %A = zext <2 x i32> %X to <2 x i36>
@@ -272,7 +272,7 @@ define <2 x i64> @test8_vec(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test8_vec(
 ; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[E:%.*]] = shl nuw <2 x i64> [[D]], <i64 32, i64 32>
+; CHECK-NEXT:    [[E:%.*]] = shl nuw <2 x i64> [[D]], splat (i64 32)
 ; CHECK-NEXT:    [[F:%.*]] = or disjoint <2 x i64> [[E]], [[C]]
 ; CHECK-NEXT:    ret <2 x i64> [[F]]
 ;
@@ -358,7 +358,7 @@ define i64 @test11(i32 %A, i32 %B) {
 define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test11_vec(
 ; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    [[F:%.*]] = shl nuw nsw <2 x i64> [[C]], [[E]]
 ; CHECK-NEXT:    ret <2 x i64> [[F]]
@@ -422,7 +422,7 @@ define i64 @test12(i32 %A, i32 %B) {
 define <2 x i64> @test12_vec(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test12_vec(
 ; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    [[F:%.*]] = lshr <2 x i64> [[C]], [[E]]
 ; CHECK-NEXT:    ret <2 x i64> [[F]]
@@ -486,7 +486,7 @@ define i64 @test13(i32 %A, i32 %B) {
 define <2 x i64> @test13_vec(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test13_vec(
 ; CHECK-NEXT:    [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    [[F:%.*]] = ashr <2 x i64> [[C]], [[E]]
 ; CHECK-NEXT:    ret <2 x i64> [[F]]
@@ -704,7 +704,7 @@ define i32 @trunc_shl_32_i32_i64(i64 %val) {
 define <2 x i32> @trunc_shl_16_v2i32_v2i64(<2 x i64> %val) {
 ; CHECK-LABEL: @trunc_shl_16_v2i32_v2i64(
 ; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], splat (i32 16)
 ; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
 ;
   %shl = shl <2 x i64> %val, <i64 16, i64 16>
@@ -754,7 +754,7 @@ define i32 @trunc_shl_lshr_infloop(i64 %arg) {
 define <2 x i32> @trunc_shl_v2i32_v2i64_uniform(<2 x i64> %val) {
 ; CHECK-LABEL: @trunc_shl_v2i32_v2i64_uniform(
 ; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
 ;
   %shl = shl <2 x i64> %val, <i64 31, i64 31>
@@ -862,7 +862,7 @@ define i32 @trunc_shl_shl_var(i64 %arg, i64 %val) {
 define <8 x i16> @trunc_shl_v8i15_v8i32_15(<8 x i32> %a) {
 ; CHECK-LABEL: @trunc_shl_v8i15_v8i32_15(
 ; CHECK-NEXT:    [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16>
-; CHECK-NEXT:    [[CONV:%.*]] = shl <8 x i16> [[A_TR]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[CONV:%.*]] = shl <8 x i16> [[A_TR]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[CONV]]
 ;
   %shl = shl <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
@@ -891,7 +891,7 @@ define <8 x i16> @trunc_shl_v8i16_v8i32_17(<8 x i32> %a) {
 define <8 x i16> @trunc_shl_v8i16_v8i32_4(<8 x i32> %a) {
 ; CHECK-LABEL: @trunc_shl_v8i16_v8i32_4(
 ; CHECK-NEXT:    [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16>
-; CHECK-NEXT:    [[CONV:%.*]] = shl <8 x i16> [[A_TR]], <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+; CHECK-NEXT:    [[CONV:%.*]] = shl <8 x i16> [[A_TR]], splat (i16 4)
 ; CHECK-NEXT:    ret <8 x i16> [[CONV]]
 ;
   %shl = shl <8 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
diff --git a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll
index c50a3d06d24b9c..69ccca562e885c 100644
--- a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll
@@ -34,7 +34,7 @@ define <2 x i8> @trunc_shl_trunc(<2 x i64> %a) {
 
 define <2 x i8> @trunc_lshr_trunc_uniform(<2 x i64> %a) {
 ; CHECK-LABEL: @trunc_lshr_trunc_uniform(
-; CHECK-NEXT:    [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 8, i64 8>
+; CHECK-NEXT:    [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 8)
 ; CHECK-NEXT:    [[D:%.*]] = trunc <2 x i64> [[C1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
@@ -120,7 +120,7 @@ define i8 @trunc_ashr_trunc_exact(i64 %a) {
 
 define <2 x i8> @trunc_ashr_trunc_uniform(<2 x i64> %a) {
 ; CHECK-LABEL: @trunc_ashr_trunc_uniform(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 8, i64 8>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 8)
 ; CHECK-NEXT:    [[D:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[D]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll
index 0956fc655ec9a7..15533346b32ece 100644
--- a/llvm/test/Transforms/InstCombine/trunc.ll
+++ b/llvm/test/Transforms/InstCombine/trunc.ll
@@ -25,7 +25,7 @@ define i64 @test1(i64 %a) {
 define <2 x i64> @test1_vec(<2 x i64> %a) {
 ; CHECK-LABEL: @test1_vec(
 ; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[D:%.*]] = and <2 x i64> [[A]], <i64 15, i64 15>
+; CHECK-NEXT:    [[D:%.*]] = and <2 x i64> [[A]], splat (i64 15)
 ; CHECK-NEXT:    call void @use_vec(<2 x i32> [[B]])
 ; CHECK-NEXT:    ret <2 x i64> [[D]]
 ;
@@ -83,8 +83,8 @@ define i64 @test2(i64 %a) {
 define <2 x i64> @test2_vec(<2 x i64> %a) {
 ; CHECK-LABEL: @test2_vec(
 ; CHECK-NEXT:    [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[D1:%.*]] = shl <2 x i64> [[A]], <i64 36, i64 36>
-; CHECK-NEXT:    [[D:%.*]] = ashr exact <2 x i64> [[D1]], <i64 36, i64 36>
+; CHECK-NEXT:    [[D1:%.*]] = shl <2 x i64> [[A]], splat (i64 36)
+; CHECK-NEXT:    [[D:%.*]] = ashr exact <2 x i64> [[D1]], splat (i64 36)
 ; CHECK-NEXT:    call void @use_vec(<2 x i32> [[B]])
 ; CHECK-NEXT:    ret <2 x i64> [[D]]
 ;
@@ -230,8 +230,8 @@ define i32 @trunc_ashr(i32 %X) {
 
 define <2 x i32> @trunc_ashr_vec(<2 x i32> %X) {
 ; CHECK-LABEL: @trunc_ashr_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], <i32 8, i32 8>
-; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], <i32 -8388608, i32 -8388608>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X:%.*]], splat (i32 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], splat (i32 -8388608)
 ; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %A = zext <2 x i32> %X to <2 x i36>
@@ -273,7 +273,7 @@ define <2 x i64> @test8_vec(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test8_vec(
 ; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[E:%.*]] = shl nuw <2 x i64> [[D]], <i64 32, i64 32>
+; CHECK-NEXT:    [[E:%.*]] = shl nuw <2 x i64> [[D]], splat (i64 32)
 ; CHECK-NEXT:    [[F:%.*]] = or disjoint <2 x i64> [[E]], [[C]]
 ; CHECK-NEXT:    ret <2 x i64> [[F]]
 ;
@@ -359,7 +359,7 @@ define i64 @test11(i32 %A, i32 %B) {
 define <2 x i64> @test11_vec(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test11_vec(
 ; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    [[F:%.*]] = shl nuw nsw <2 x i64> [[C]], [[E]]
 ; CHECK-NEXT:    ret <2 x i64> [[F]]
@@ -423,7 +423,7 @@ define i64 @test12(i32 %A, i32 %B) {
 define <2 x i64> @test12_vec(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test12_vec(
 ; CHECK-NEXT:    [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    [[F:%.*]] = lshr <2 x i64> [[C]], [[E]]
 ; CHECK-NEXT:    ret <2 x i64> [[F]]
@@ -487,7 +487,7 @@ define i64 @test13(i32 %A, i32 %B) {
 define <2 x i64> @test13_vec(<2 x i32> %A, <2 x i32> %B) {
 ; CHECK-LABEL: @test13_vec(
 ; CHECK-NEXT:    [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 31)
 ; CHECK-NEXT:    [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64>
 ; CHECK-NEXT:    [[F:%.*]] = ashr <2 x i64> [[C]], [[E]]
 ; CHECK-NEXT:    ret <2 x i64> [[F]]
@@ -705,7 +705,7 @@ define i32 @trunc_shl_32_i32_i64(i64 %val) {
 define <2 x i32> @trunc_shl_16_v2i32_v2i64(<2 x i64> %val) {
 ; CHECK-LABEL: @trunc_shl_16_v2i32_v2i64(
 ; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], splat (i32 16)
 ; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
 ;
   %shl = shl <2 x i64> %val, <i64 16, i64 16>
@@ -755,7 +755,7 @@ define i32 @trunc_shl_lshr_infloop(i64 %arg) {
 define <2 x i32> @trunc_shl_v2i32_v2i64_uniform(<2 x i64> %val) {
 ; CHECK-LABEL: @trunc_shl_v2i32_v2i64_uniform(
 ; CHECK-NEXT:    [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 31, i32 31>
+; CHECK-NEXT:    [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i32> [[TRUNC]]
 ;
   %shl = shl <2 x i64> %val, <i64 31, i64 31>
@@ -863,7 +863,7 @@ define i32 @trunc_shl_shl_var(i64 %arg, i64 %val) {
 define <8 x i16> @trunc_shl_v8i15_v8i32_15(<8 x i32> %a) {
 ; CHECK-LABEL: @trunc_shl_v8i15_v8i32_15(
 ; CHECK-NEXT:    [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16>
-; CHECK-NEXT:    [[CONV:%.*]] = shl <8 x i16> [[A_TR]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT:    [[CONV:%.*]] = shl <8 x i16> [[A_TR]], splat (i16 15)
 ; CHECK-NEXT:    ret <8 x i16> [[CONV]]
 ;
   %shl = shl <8 x i32> %a, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
@@ -892,7 +892,7 @@ define <8 x i16> @trunc_shl_v8i16_v8i32_17(<8 x i32> %a) {
 define <8 x i16> @trunc_shl_v8i16_v8i32_4(<8 x i32> %a) {
 ; CHECK-LABEL: @trunc_shl_v8i16_v8i32_4(
 ; CHECK-NEXT:    [[A_TR:%.*]] = trunc <8 x i32> [[A:%.*]] to <8 x i16>
-; CHECK-NEXT:    [[CONV:%.*]] = shl <8 x i16> [[A_TR]], <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
+; CHECK-NEXT:    [[CONV:%.*]] = shl <8 x i16> [[A_TR]], splat (i16 4)
 ; CHECK-NEXT:    ret <8 x i16> [[CONV]]
 ;
   %shl = shl <8 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
diff --git a/llvm/test/Transforms/InstCombine/truncating-saturate.ll b/llvm/test/Transforms/InstCombine/truncating-saturate.ll
index c0111528e2a4d8..44bba283c66862 100644
--- a/llvm/test/Transforms/InstCombine/truncating-saturate.ll
+++ b/llvm/test/Transforms/InstCombine/truncating-saturate.ll
@@ -62,8 +62,8 @@ define i16 @testi32i16i8(i32 %add) {
 
 define <4 x i16> @testv4i32i16i8(<4 x i32> %add) {
 ; CHECK-LABEL: @testv4i32i16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[ADD:%.*]], <4 x i32> <i32 -128, i32 -128, i32 -128, i32 -128>)
-; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 127, i32 127, i32 127, i32 127>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[ADD:%.*]], <4 x i32> splat (i32 -128))
+; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> splat (i32 127))
 ; CHECK-NEXT:    [[R:%.*]] = trunc nsw <4 x i32> [[TMP2]] to <4 x i16>
 ; CHECK-NEXT:    ret <4 x i16> [[R]]
 ;
@@ -147,8 +147,8 @@ define i32 @testi64i32addsat(i32 %a, i32 %b) {
 
 define <4 x i8> @testv4i16i8(<4 x i16> %add) {
 ; CHECK-LABEL: @testv4i16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[ADD:%.*]], <4 x i16> <i16 -128, i16 -128, i16 -128, i16 -128>)
-; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 127, i16 127, i16 127, i16 127>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[ADD:%.*]], <4 x i16> splat (i16 -128))
+; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP1]], <4 x i16> splat (i16 127))
 ; CHECK-NEXT:    [[COND_I:%.*]] = trunc nsw <4 x i16> [[TMP2]] to <4 x i8>
 ; CHECK-NEXT:    ret <4 x i8> [[COND_I]]
 ;
@@ -567,7 +567,7 @@ define i8 @C0zero(i8 %X, i8 %y, i8 %z) {
 
 define <2 x i8> @C0zeroV(<2 x i8> %X, <2 x i8> %y, <2 x i8> %z) {
 ; CHECK-LABEL: @C0zeroV(
-; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i8> [[X:%.*]], <i8 -10, i8 -10>
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i8> [[X:%.*]], splat (i8 -10)
 ; CHECK-NEXT:    [[F:%.*]] = select <2 x i1> [[C]], <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[F]]
 ;
@@ -581,9 +581,9 @@ define <2 x i8> @C0zeroV(<2 x i8> %X, <2 x i8> %y, <2 x i8> %z) {
 
 define <2 x i8> @C0zeroVu(<2 x i8> %X, <2 x i8> %y, <2 x i8> %z) {
 ; CHECK-LABEL: @C0zeroVu(
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[X:%.*]], <i8 10, i8 10>
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[X:%.*]], splat (i8 10)
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <2 x i8> [[A]], <i8 0, i8 10>
-; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i8> [[X]], <i8 -10, i8 -10>
+; CHECK-NEXT:    [[C:%.*]] = icmp slt <2 x i8> [[X]], splat (i8 -10)
 ; CHECK-NEXT:    [[F:%.*]] = select <2 x i1> [[C]], <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP]], <2 x i8> [[X]], <2 x i8> [[F]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll b/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll
index fd5d38bb38ddc7..eb021a0fd2c894 100644
--- a/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll
+++ b/llvm/test/Transforms/InstCombine/uadd-with-overflow.ll
@@ -42,7 +42,7 @@ define { i8, i1 } @no_fold_on_constant_add_overflow(i8 %x) {
 define { <2 x i8>, <2 x i1> } @no_fold_vector_no_overflow(<2 x i8> %x) {
 ; CHECK-LABEL: @no_fold_vector_no_overflow(
 ; CHECK-NEXT:    [[A:%.*]] = add nuw <2 x i8> [[X:%.*]], <i8 -57, i8 -56>
-; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> <i8 55, i8 55>)
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> splat (i8 55))
 ; CHECK-NEXT:    ret { <2 x i8>, <2 x i1> } [[B]]
 ;
   %a = add nuw <2 x i8> %x, <i8 199, i8 200>
@@ -53,7 +53,7 @@ define { <2 x i8>, <2 x i1> } @no_fold_vector_no_overflow(<2 x i8> %x) {
 define { <2 x i8>, <2 x i1> } @no_fold_vector_overflow(<2 x i8> %x) {
 ; CHECK-LABEL: @no_fold_vector_overflow(
 ; CHECK-NEXT:    [[A:%.*]] = add nuw <2 x i8> [[X:%.*]], <i8 -56, i8 -55>
-; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> <i8 55, i8 55>)
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i8>, <2 x i1> } @llvm.uadd.with.overflow.v2i8(<2 x i8> [[A]], <2 x i8> splat (i8 55))
 ; CHECK-NEXT:    ret { <2 x i8>, <2 x i1> } [[B]]
 ;
   %a = add nuw <2 x i8> %x, <i8 200, i8 201>
@@ -63,7 +63,7 @@ define { <2 x i8>, <2 x i1> } @no_fold_vector_overflow(<2 x i8> %x) {
 
 define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) {
 ; CHECK-LABEL: @fold_simple_splat_constant(
-; CHECK-NEXT:    [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 42, i32 42>)
+; CHECK-NEXT:    [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 42))
 ; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
 ;
   %a = add nuw <2 x i32> %x, <i32 12, i32 12>
@@ -74,7 +74,7 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant(<2 x i32> %x) {
 define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) {
 ; CHECK-LABEL: @no_fold_splat_undef_constant(
 ; CHECK-NEXT:    [[A:%.*]] = add nuw <2 x i32> [[X:%.*]], <i32 12, i32 undef>
-; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 30, i32 30>)
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30))
 ; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
 ;
   %a = add nuw <2 x i32> %x, <i32 12, i32 undef>
@@ -85,7 +85,7 @@ define { <2 x i32>, <2 x i1> } @no_fold_splat_undef_constant(<2 x i32> %x) {
 define { <2 x i32>, <2 x i1> } @no_fold_splat_not_constant(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @no_fold_splat_not_constant(
 ; CHECK-NEXT:    [[A:%.*]] = add nuw <2 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 30, i32 30>)
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30))
 ; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
 ;
   %a = add nuw <2 x i32> %x, %y
@@ -128,7 +128,7 @@ define { i32, i1 } @no_fold_wrapped_add(i32 %x) {
 
 define { <2 x i32>, <2 x i1> } @fold_simple_splat_with_disjoint_or_constant(<2 x i32> %x) {
 ; CHECK-LABEL: @fold_simple_splat_with_disjoint_or_constant(
-; CHECK-NEXT:    [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 42, i32 42>)
+; CHECK-NEXT:    [[B:%.*]] = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[X:%.*]], <2 x i32> splat (i32 42))
 ; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
 ;
   %a = or disjoint <2 x i32> %x, <i32 12, i32 12>
@@ -139,8 +139,8 @@ define { <2 x i32>, <2 x i1> } @fold_simple_splat_with_disjoint_or_constant(<2 x
 
 define { <2 x i32>, <2 x i1> } @fold_simple_splat_constant_with_or_fail(<2 x i32> %x) {
 ; CHECK-LABEL: @fold_simple_splat_constant_with_or_fail(
-; CHECK-NEXT:    [[A:%.*]] = or <2 x i32> [[X:%.*]], <i32 12, i32 12>
-; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> <i32 30, i32 30>)
+; CHECK-NEXT:    [[A:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 12)
+; CHECK-NEXT:    [[B:%.*]] = tail call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> [[A]], <2 x i32> splat (i32 30))
 ; CHECK-NEXT:    ret { <2 x i32>, <2 x i1> } [[B]]
 ;
   %a = or <2 x i32> %x, <i32 12, i32 12>
diff --git a/llvm/test/Transforms/InstCombine/uaddo.ll b/llvm/test/Transforms/InstCombine/uaddo.ll
index 9b56dce8b45856..ae7a07ec8000c9 100644
--- a/llvm/test/Transforms/InstCombine/uaddo.ll
+++ b/llvm/test/Transforms/InstCombine/uaddo.ll
@@ -18,7 +18,7 @@ define i32 @uaddo_commute1(i32 %x, i32 %y, i32 %z) {
 
 define <2 x i32> @uaddo_commute2(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
 ; CHECK-LABEL: @uaddo_commute2(
-; CHECK-NEXT:    [[NOTY:%.*]] = xor <2 x i32> [[Y:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[NOTY:%.*]] = xor <2 x i32> [[Y:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[Y]], [[X:%.*]]
 ; CHECK-NEXT:    [[C:%.*]] = icmp ugt <2 x i32> [[X]], [[NOTY]]
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> [[Z:%.*]], <2 x i32> [[A]]
diff --git a/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll b/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll
index 824ed4f2b43947..546824680ea131 100644
--- a/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll
+++ b/llvm/test/Transforms/InstCombine/udiv_select_to_select_shift.ll
@@ -24,7 +24,7 @@ define i64 @test(i64 %X, i1 %Cond ) {
 
 define <2 x i32> @PR34856(<2 x i32> %t0, <2 x i32> %t1) {
 ; CHECK-LABEL: @PR34856(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i32> [[T1:%.*]], <i32 -8, i32 -8>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i32> [[T1:%.*]], splat (i32 -8)
 ; CHECK-NEXT:    [[DIV1:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    ret <2 x i32> [[DIV1]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
index 0d87122660cfa1..c8228057eeb10a 100644
--- a/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
+++ b/llvm/test/Transforms/InstCombine/umin_cttz_ctlz.ll
@@ -108,7 +108,7 @@ define i1 @umin_cttz_i1_zero_defined(i1 %X) {
 define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(<2 x i32> %X) {
 ; CHECK-LABEL: define <2 x i32> @umin_cttz_2xi32_splat_zero_undefined(
 ; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 64, i32 64>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 64)
 ; CHECK-NEXT:    [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
@@ -298,7 +298,7 @@ define i1 @umin_ctlz_i1_zero_defined(i1 %X) {
 define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(<2 x i32> %X) {
 ; CHECK-LABEL: define <2 x i32> @umin_ctlz_2xi32_splat_zero_undefined(
 ; CHECK-SAME: <2 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X]], <i32 33554432, i32 33554432>
+; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[X]], splat (i32 33554432)
 ; CHECK-NEXT:    [[RET:%.*]] = call range(i32 0, 7) <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP1]], i1 true)
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll
index 17b32670ae9d7b..a25ee985759b8c 100644
--- a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll
+++ b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check-via-xor.ll
@@ -26,7 +26,7 @@ define i1 @t0_basic(i8 %x, i8 %y) {
 
 define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @t1_vec(
-; CHECK-NEXT:    [[T0:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[T0:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    call void @use2x8(<2 x i8> [[T0]])
 ; CHECK-NEXT:    [[R:%.*]] = icmp ule <2 x i8> [[X:%.*]], [[T0]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll
index 677ef47456c013..be0b0e0da562fb 100644
--- a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll
+++ b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll
@@ -21,7 +21,7 @@ define i1 @t0_basic(i8 %x, i8 %y) {
 
 define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @t1_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = icmp ule <2 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll
index 457a0e594b6303..0bd34deb41d5fc 100644
--- a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll
+++ b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check-via-xor.ll
@@ -26,7 +26,7 @@ define i1 @t0_basic(i8 %x, i8 %y) {
 
 define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @t1_vec(
-; CHECK-NEXT:    [[T0:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[T0:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    call void @use2x8(<2 x i8> [[T0]])
 ; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[T0]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll
index 94966a1eba3289..b4865218dbc143 100644
--- a/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll
+++ b/llvm/test/Transforms/InstCombine/unsigned-add-overflow-check.ll
@@ -21,7 +21,7 @@ define i1 @t0_basic(i8 %x, i8 %y) {
 
 define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @t1_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i8> [[X:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
index ab147584d2108f..1509a42872922a 100644
--- a/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
+++ b/llvm/test/Transforms/InstCombine/unsigned_saturated_sub.ll
@@ -40,7 +40,7 @@ define i32 @usub_sat_C1_C2_produce_0_too(i32 %a){
 ; vector tests
 define <2 x i16> @usub_sat_C1_C2_splat(<2 x i16> %a) {
 ; CHECK-LABEL: @usub_sat_C1_C2_splat(
-; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> <i16 50, i16 50>, <2 x i16> [[A:%.*]])
+; CHECK-NEXT:    [[COND:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> splat (i16 50), <2 x i16> [[A:%.*]])
 ; CHECK-NEXT:    ret <2 x i16> [[COND]]
 ;
   %add = sub nuw <2 x i16> <i16 64, i16 64>, %a
diff --git a/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll b/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll
index 262942aa1219b8..55bad9a57b8634 100644
--- a/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll
+++ b/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll
@@ -187,7 +187,7 @@ define i64 @t3_notrunc_redundant_sext(i64 %data, i64 %nbits) {
 
 define <2 x i32> @t4_vec(<2 x i64> %data, <2 x i32> %nbits) {
 ; CHECK-LABEL: @t4_vec(
-; CHECK-NEXT:    [[SKIP_HIGH:%.*]] = sub <2 x i32> <i32 64, i32 64>, [[NBITS:%.*]]
+; CHECK-NEXT:    [[SKIP_HIGH:%.*]] = sub <2 x i32> splat (i32 64), [[NBITS:%.*]]
 ; CHECK-NEXT:    [[SKIP_HIGH_WIDE:%.*]] = zext nneg <2 x i32> [[SKIP_HIGH]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> [[DATA:%.*]], [[SKIP_HIGH_WIDE]]
 ; CHECK-NEXT:    [[SIGNEXTENDED:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
index f78a7055ff2f6c..644f13f8eee636 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
@@ -641,7 +641,7 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) {
 ;; indices.
 define ptr @PR41624(<2 x ptr> %a) {
 ; CHECK-LABEL: @PR41624(
-; CHECK-NEXT:    [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 5>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
index 1fd7903307cef4..a4b55e2b6cfec1 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -644,7 +644,7 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) {
 ;; indices.
 define ptr @PR41624(<2 x ptr> %a) {
 ; CHECK-LABEL: @PR41624(
-; CHECK-NEXT:    [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 5>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll
index c43def83f58aca..ecd5e49bd6b28e 100644
--- a/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_phi_extract-inseltpoison.ll
@@ -97,7 +97,7 @@ define void @nocopy(i64 %val, i32  %limit, ptr %ptr) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[ELT]] to i64
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]]
 ; CHECK-NEXT:    store i32 [[TMP5]], ptr [[TMP7]], align 4
-; CHECK-NEXT:    [[INC]] = add <16 x i32> [[TMP4]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[INC]] = add <16 x i32> [[TMP4]], splat (i32 16)
 ; CHECK-NEXT:    br i1 [[END]], label [[LOOP]], label [[RET:%.*]]
 ; CHECK:       ret:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll
index 84657c9d43896d..ef093709a98f58 100644
--- a/llvm/test/Transforms/InstCombine/vec_phi_extract.ll
+++ b/llvm/test/Transforms/InstCombine/vec_phi_extract.ll
@@ -97,7 +97,7 @@ define void @nocopy(i64 %val, i32  %limit, ptr %ptr) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[ELT]] to i64
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP6]]
 ; CHECK-NEXT:    store i32 [[TMP5]], ptr [[TMP7]], align 4
-; CHECK-NEXT:    [[INC]] = add <16 x i32> [[TMP4]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[INC]] = add <16 x i32> [[TMP4]], splat (i32 16)
 ; CHECK-NEXT:    br i1 [[END]], label [[LOOP]], label [[RET:%.*]]
 ; CHECK:       ret:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/InstCombine/vec_sext.ll b/llvm/test/Transforms/InstCombine/vec_sext.ll
index 9f5f957f494452..925f491d298410 100644
--- a/llvm/test/Transforms/InstCombine/vec_sext.ll
+++ b/llvm/test/Transforms/InstCombine/vec_sext.ll
@@ -44,7 +44,7 @@ define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b)
 
 define <2 x i32> @is_negative_poison_elt(<2 x i32> %a) {
 ; CHECK-LABEL: @is_negative_poison_elt(
-; CHECK-NEXT:    [[A_LOBIT:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31>
+; CHECK-NEXT:    [[A_LOBIT:%.*]] = ashr <2 x i32> [[A:%.*]], splat (i32 31)
 ; CHECK-NEXT:    ret <2 x i32> [[A_LOBIT]]
 ;
   %cmp = icmp slt <2 x i32> %a, <i32 0, i32 poison>
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
index 0f233fbb4729e6..80fe4b78fe8a9f 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll
@@ -264,7 +264,7 @@ define <3 x i32> @add_wider(i32 %y, i32 %z) {
 ; CHECK-LABEL: @add_wider(
 ; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[I1]], <i32 255, i32 255>
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[I1]], splat (i32 255)
 ; CHECK-NEXT:    [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> <i32 0, i32 1, i32 poison>
 ; CHECK-NEXT:    ret <3 x i32> [[EXT]]
 ;
@@ -281,7 +281,7 @@ define <3 x i32> @div_wider(i32 %y, i32 %z) {
 ; CHECK-LABEL: @div_wider(
 ; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1
-; CHECK-NEXT:    [[A:%.*]] = sdiv <2 x i32> [[I1]], <i32 255, i32 255>
+; CHECK-NEXT:    [[A:%.*]] = sdiv <2 x i32> [[I1]], splat (i32 255)
 ; CHECK-NEXT:    [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> <i32 0, i32 1, i32 poison>
 ; CHECK-NEXT:    ret <3 x i32> [[EXT]]
 ;
@@ -378,7 +378,7 @@ define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) {
 
 define <1 x i32> @test16a(i32 %ele) {
 ; CHECK-LABEL: @test16a(
-; CHECK-NEXT:    ret <1 x i32> <i32 2>
+; CHECK-NEXT:    ret <1 x i32> splat (i32 2)
 ;
   %t0 = insertelement <2 x i32> <i32 1, i32 undef>, i32 %ele, i32 1
   %t1 = shl <2 x i32> %t0, <i32 1, i32 1>
@@ -388,7 +388,7 @@ define <1 x i32> @test16a(i32 %ele) {
 
 define <4 x i8> @test16b(i8 %ele) {
 ; CHECK-LABEL: @test16b(
-; CHECK-NEXT:    ret <4 x i8> <i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    ret <4 x i8> splat (i8 2)
 ;
   %t0 = insertelement <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 undef, i8 1>, i8 %ele, i32 6
   %t1 = shl <8 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -702,7 +702,7 @@ define <4 x i16> @widening_shuffle_or(<2 x i16> %v) {
 
 define <4 x i32> @shuffle_17add2(<4 x i32> %v) {
 ; CHECK-LABEL: @shuffle_17add2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %t1 = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -936,7 +936,7 @@ define <2 x i32> @lshr_splat_constant1(<2 x i32> %x) {
 define <2 x i32> @urem_splat_constant0(<2 x i32> %x) {
 ; CHECK-LABEL: @urem_splat_constant0(
 ; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = urem <2 x i32> <i32 42, i32 42>, [[SPLAT]]
+; CHECK-NEXT:    [[R:%.*]] = urem <2 x i32> splat (i32 42), [[SPLAT]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %splat = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> zeroinitializer
@@ -958,7 +958,7 @@ define <2 x i32> @urem_splat_constant1(<2 x i32> %x) {
 define <2 x i32> @srem_splat_constant0(<2 x i32> %x) {
 ; CHECK-LABEL: @srem_splat_constant0(
 ; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = srem <2 x i32> <i32 42, i32 42>, [[SPLAT]]
+; CHECK-NEXT:    [[R:%.*]] = srem <2 x i32> splat (i32 42), [[SPLAT]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %splat = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> zeroinitializer
@@ -980,7 +980,7 @@ define <2 x i32> @srem_splat_constant1(<2 x i32> %x) {
 define <2 x i32> @udiv_splat_constant0(<2 x i32> %x) {
 ; CHECK-LABEL: @udiv_splat_constant0(
 ; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = udiv <2 x i32> <i32 42, i32 42>, [[SPLAT]]
+; CHECK-NEXT:    [[R:%.*]] = udiv <2 x i32> splat (i32 42), [[SPLAT]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %splat = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> zeroinitializer
@@ -1002,7 +1002,7 @@ define <2 x i32> @udiv_splat_constant1(<2 x i32> %x) {
 define <2 x i32> @sdiv_splat_constant0(<2 x i32> %x) {
 ; CHECK-LABEL: @sdiv_splat_constant0(
 ; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = sdiv <2 x i32> <i32 42, i32 42>, [[SPLAT]]
+; CHECK-NEXT:    [[R:%.*]] = sdiv <2 x i32> splat (i32 42), [[SPLAT]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %splat = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> zeroinitializer
@@ -1703,7 +1703,7 @@ define <3 x i8> @splat_assoc_or(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
 define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @splat_assoc_fdiv(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], <float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc nsz <2 x float> [[A]], [[SPLATX]]
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
@@ -1718,7 +1718,7 @@ define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) {
 define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @splat_assoc_fadd(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], <float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    call void @use(<2 x float> [[A]])
 ; CHECK-NEXT:    [[R:%.*]] = fadd fast <2 x float> [[A]], [[SPLATX]]
 ; CHECK-NEXT:    ret <2 x float> [[R]]
@@ -1735,7 +1735,7 @@ define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) {
 define <3 x i32> @splat_assoc_and(<4 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @splat_assoc_and(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <3 x i32> zeroinitializer
-; CHECK-NEXT:    [[A:%.*]] = and <3 x i32> [[Y:%.*]], <i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[A:%.*]] = and <3 x i32> [[Y:%.*]], splat (i32 42)
 ; CHECK-NEXT:    [[R:%.*]] = and <3 x i32> [[SPLATX]], [[A]]
 ; CHECK-NEXT:    ret <3 x i32> [[R]]
 ;
@@ -1751,7 +1751,7 @@ define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) {
 ; CHECK-LABEL: @splat_assoc_xor(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <5 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <5 x i32> [[Y:%.*]], [[SPLATX]]
-; CHECK-NEXT:    [[R:%.*]] = xor <5 x i32> [[TMP1]], <i32 42, i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[R:%.*]] = xor <5 x i32> [[TMP1]], splat (i32 42)
 ; CHECK-NEXT:    ret <5 x i32> [[R]]
 ;
   %splatx = shufflevector <4 x i32> %x, <4 x i32> poison, <5 x i32> zeroinitializer
@@ -1765,7 +1765,7 @@ define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) {
 define <4 x i32> @splat_assoc_add_mul(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @splat_assoc_add_mul(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[Y:%.*]], splat (i32 42)
 ; CHECK-NEXT:    [[R:%.*]] = mul <4 x i32> [[SPLATX]], [[A]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
index 75a84e51279b80..f99874c4a01a0f 100644
--- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll
@@ -254,7 +254,7 @@ define <3 x i32> @add_wider(i32 %y, i32 %z) {
 ; CHECK-LABEL: @add_wider(
 ; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[I1]], <i32 255, i32 255>
+; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[I1]], splat (i32 255)
 ; CHECK-NEXT:    [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> <i32 0, i32 1, i32 poison>
 ; CHECK-NEXT:    ret <3 x i32> [[EXT]]
 ;
@@ -271,7 +271,7 @@ define <3 x i32> @div_wider(i32 %y, i32 %z) {
 ; CHECK-LABEL: @div_wider(
 ; CHECK-NEXT:    [[I0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0
 ; CHECK-NEXT:    [[I1:%.*]] = insertelement <2 x i32> [[I0]], i32 [[Z:%.*]], i64 1
-; CHECK-NEXT:    [[A:%.*]] = sdiv <2 x i32> [[I1]], <i32 255, i32 255>
+; CHECK-NEXT:    [[A:%.*]] = sdiv <2 x i32> [[I1]], splat (i32 255)
 ; CHECK-NEXT:    [[EXT:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> poison, <3 x i32> <i32 0, i32 1, i32 poison>
 ; CHECK-NEXT:    ret <3 x i32> [[EXT]]
 ;
@@ -368,7 +368,7 @@ define <4 x float> @test15b(<4 x float> %LHS, <4 x float> %RHS) {
 
 define <1 x i32> @test16a(i32 %ele) {
 ; CHECK-LABEL: @test16a(
-; CHECK-NEXT:    ret <1 x i32> <i32 2>
+; CHECK-NEXT:    ret <1 x i32> splat (i32 2)
 ;
   %t0 = insertelement <2 x i32> <i32 1, i32 undef>, i32 %ele, i32 1
   %t1 = shl <2 x i32> %t0, <i32 1, i32 1>
@@ -378,7 +378,7 @@ define <1 x i32> @test16a(i32 %ele) {
 
 define <4 x i8> @test16b(i8 %ele) {
 ; CHECK-LABEL: @test16b(
-; CHECK-NEXT:    ret <4 x i8> <i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    ret <4 x i8> splat (i8 2)
 ;
   %t0 = insertelement <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 undef, i8 1>, i8 %ele, i32 6
   %t1 = shl <8 x i8> %t0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -696,7 +696,7 @@ define <4 x i16> @widening_shuffle_or(<2 x i16> %v) {
 
 define <4 x i32> @shuffle_17add2(<4 x i32> %v) {
 ; CHECK-LABEL: @shuffle_17add2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %t1 = shufflevector <4 x i32> %v, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -930,7 +930,7 @@ define <2 x i32> @lshr_splat_constant1(<2 x i32> %x) {
 define <2 x i32> @urem_splat_constant0(<2 x i32> %x) {
 ; CHECK-LABEL: @urem_splat_constant0(
 ; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = urem <2 x i32> <i32 42, i32 42>, [[SPLAT]]
+; CHECK-NEXT:    [[R:%.*]] = urem <2 x i32> splat (i32 42), [[SPLAT]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -952,7 +952,7 @@ define <2 x i32> @urem_splat_constant1(<2 x i32> %x) {
 define <2 x i32> @srem_splat_constant0(<2 x i32> %x) {
 ; CHECK-LABEL: @srem_splat_constant0(
 ; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = srem <2 x i32> <i32 42, i32 42>, [[SPLAT]]
+; CHECK-NEXT:    [[R:%.*]] = srem <2 x i32> splat (i32 42), [[SPLAT]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -974,7 +974,7 @@ define <2 x i32> @srem_splat_constant1(<2 x i32> %x) {
 define <2 x i32> @udiv_splat_constant0(<2 x i32> %x) {
 ; CHECK-LABEL: @udiv_splat_constant0(
 ; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = udiv <2 x i32> <i32 42, i32 42>, [[SPLAT]]
+; CHECK-NEXT:    [[R:%.*]] = udiv <2 x i32> splat (i32 42), [[SPLAT]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -996,7 +996,7 @@ define <2 x i32> @udiv_splat_constant1(<2 x i32> %x) {
 define <2 x i32> @sdiv_splat_constant0(<2 x i32> %x) {
 ; CHECK-LABEL: @sdiv_splat_constant0(
 ; CHECK-NEXT:    [[SPLAT:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = sdiv <2 x i32> <i32 42, i32 42>, [[SPLAT]]
+; CHECK-NEXT:    [[R:%.*]] = sdiv <2 x i32> splat (i32 42), [[SPLAT]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %splat = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -1708,7 +1708,7 @@ define <3 x i8> @splat_assoc_or(<3 x i8> %x, <3 x i8> %y, <3 x i8> %z) {
 define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @splat_assoc_fdiv(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], <float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[A:%.*]] = fdiv reassoc nsz <2 x float> [[Y:%.*]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    [[R:%.*]] = fdiv reassoc nsz <2 x float> [[A]], [[SPLATX]]
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
@@ -1723,7 +1723,7 @@ define <2 x float> @splat_assoc_fdiv(<2 x float> %x, <2 x float> %y) {
 define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) {
 ; CHECK-LABEL: @splat_assoc_fadd(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
-; CHECK-NEXT:    [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], <float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[A:%.*]] = fadd fast <2 x float> [[Y:%.*]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    call void @use(<2 x float> [[A]])
 ; CHECK-NEXT:    [[R:%.*]] = fadd fast <2 x float> [[A]], [[SPLATX]]
 ; CHECK-NEXT:    ret <2 x float> [[R]]
@@ -1740,7 +1740,7 @@ define <2 x float> @splat_assoc_fadd(<2 x float> %x, <2 x float> %y) {
 define <3 x i32> @splat_assoc_and(<4 x i32> %x, <3 x i32> %y) {
 ; CHECK-LABEL: @splat_assoc_and(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <3 x i32> zeroinitializer
-; CHECK-NEXT:    [[A:%.*]] = and <3 x i32> [[Y:%.*]], <i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[A:%.*]] = and <3 x i32> [[Y:%.*]], splat (i32 42)
 ; CHECK-NEXT:    [[R:%.*]] = and <3 x i32> [[SPLATX]], [[A]]
 ; CHECK-NEXT:    ret <3 x i32> [[R]]
 ;
@@ -1756,7 +1756,7 @@ define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) {
 ; CHECK-LABEL: @splat_assoc_xor(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <5 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <5 x i32> [[Y:%.*]], [[SPLATX]]
-; CHECK-NEXT:    [[R:%.*]] = xor <5 x i32> [[TMP1]], <i32 42, i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[R:%.*]] = xor <5 x i32> [[TMP1]], splat (i32 42)
 ; CHECK-NEXT:    ret <5 x i32> [[R]]
 ;
   %splatx = shufflevector <4 x i32> %x, <4 x i32> undef, <5 x i32> zeroinitializer
@@ -1770,7 +1770,7 @@ define <5 x i32> @splat_assoc_xor(<4 x i32> %x, <5 x i32> %y) {
 define <4 x i32> @splat_assoc_add_mul(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @splat_assoc_add_mul(
 ; CHECK-NEXT:    [[SPLATX:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[Y:%.*]], <i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[Y:%.*]], splat (i32 42)
 ; CHECK-NEXT:    [[R:%.*]] = mul <4 x i32> [[SPLATX]], [[A]]
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll
index a87364600ba308..5770e2ce72020d 100644
--- a/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll
@@ -52,8 +52,8 @@ define <2 x i1> @and_cmp_is_trunc_even_with_poison_elts(<2 x i64> %a) {
 ; The ashr turns into an lshr.
 define <2 x i64> @test2(<2 x i64> %a) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 1, i64 1>
-; CHECK-NEXT:    [[T:%.*]] = and <2 x i64> [[B]], <i64 32767, i64 32767>
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 1)
+; CHECK-NEXT:    [[T:%.*]] = and <2 x i64> [[B]], splat (i64 32767)
 ; CHECK-NEXT:    ret <2 x i64> [[T]]
 ;
   %b = and <2 x i64> %a, <i64 65535, i64 65535>
@@ -151,7 +151,7 @@ define <2 x i64> @test7(<4 x float> %a, <4 x float> %b) {
 define void @convert(ptr %dst.addr, <2 x i64> %src) {
 ; CHECK-LABEL: @convert(
 ; CHECK-NEXT:    [[VAL:%.*]] = trunc <2 x i64> [[SRC:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> [[VAL]], <i32 1, i32 1>
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> [[VAL]], splat (i32 1)
 ; CHECK-NEXT:    store <2 x i32> [[ADD]], ptr [[DST_ADDR:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -163,7 +163,7 @@ define void @convert(ptr %dst.addr, <2 x i64> %src) {
 
 define <2 x i65> @foo(<2 x i64> %t) {
 ; CHECK-LABEL: @foo(
-; CHECK-NEXT:    [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], <i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], splat (i64 4294967295)
 ; CHECK-NEXT:    [[B:%.*]] = zext nneg <2 x i64> [[A_MASK]] to <2 x i65>
 ; CHECK-NEXT:    ret <2 x i65> [[B]]
 ;
@@ -175,7 +175,7 @@ define <2 x i65> @foo(<2 x i64> %t) {
 define <2 x i64> @bar(<2 x i65> %t) {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[B:%.*]] = and <2 x i64> [[TMP1]], <i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i64> [[TMP1]], splat (i64 4294967295)
 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 ;
   %a = trunc <2 x i65> %t to <2 x i32>
@@ -196,8 +196,8 @@ define <2 x i64> @bars(<2 x i65> %t) {
 
 define <2 x i64> @quxs(<2 x i64> %t) {
 ; CHECK-LABEL: @quxs(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32>
-; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], splat (i64 32)
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], splat (i64 32)
 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 ;
   %a = trunc <2 x i64> %t to <2 x i32>
@@ -207,8 +207,8 @@ define <2 x i64> @quxs(<2 x i64> %t) {
 
 define <2 x i64> @quxt(<2 x i64> %t) {
 ; CHECK-LABEL: @quxt(
-; CHECK-NEXT:    [[A:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32>
-; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[A]], <i64 32, i64 32>
+; CHECK-NEXT:    [[A:%.*]] = shl <2 x i64> [[T:%.*]], splat (i64 32)
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[A]], splat (i64 32)
 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 ;
   %a = shl <2 x i64> %t, <i64 32, i64 32>
@@ -278,7 +278,7 @@ define <4 x float> @f(i32 %a) {
 
 define <8 x i32> @pr24458(<8 x float> %n) {
 ; CHECK-LABEL: @pr24458(
-; CHECK-NEXT:    ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    ret <8 x i32> splat (i32 -1)
 ;
   %notequal_b_load_.i = fcmp une <8 x float> %n, zeroinitializer
   %equal_a_load72_.i = fcmp ueq <8 x float> %n, zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/vector-casts.ll b/llvm/test/Transforms/InstCombine/vector-casts.ll
index fd2a4ffdfb7092..56e957d5430789 100644
--- a/llvm/test/Transforms/InstCombine/vector-casts.ll
+++ b/llvm/test/Transforms/InstCombine/vector-casts.ll
@@ -52,8 +52,8 @@ define <2 x i1> @and_cmp_is_trunc_even_with_poison_elts(<2 x i64> %a) {
 ; The ashr turns into an lshr.
 define <2 x i64> @test2(<2 x i64> %a) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 1, i64 1>
-; CHECK-NEXT:    [[T:%.*]] = and <2 x i64> [[B]], <i64 32767, i64 32767>
+; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 1)
+; CHECK-NEXT:    [[T:%.*]] = and <2 x i64> [[B]], splat (i64 32767)
 ; CHECK-NEXT:    ret <2 x i64> [[T]]
 ;
   %b = and <2 x i64> %a, <i64 65535, i64 65535>
@@ -151,7 +151,7 @@ define <2 x i64> @test7(<4 x float> %a, <4 x float> %b) {
 define void @convert(ptr %dst.addr, <2 x i64> %src) {
 ; CHECK-LABEL: @convert(
 ; CHECK-NEXT:    [[VAL:%.*]] = trunc <2 x i64> [[SRC:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> [[VAL]], <i32 1, i32 1>
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i32> [[VAL]], splat (i32 1)
 ; CHECK-NEXT:    store <2 x i32> [[ADD]], ptr [[DST_ADDR:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -163,7 +163,7 @@ define void @convert(ptr %dst.addr, <2 x i64> %src) {
 
 define <2 x i65> @foo(<2 x i64> %t) {
 ; CHECK-LABEL: @foo(
-; CHECK-NEXT:    [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], <i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], splat (i64 4294967295)
 ; CHECK-NEXT:    [[B:%.*]] = zext nneg <2 x i64> [[A_MASK]] to <2 x i65>
 ; CHECK-NEXT:    ret <2 x i65> [[B]]
 ;
@@ -175,7 +175,7 @@ define <2 x i65> @foo(<2 x i64> %t) {
 define <2 x i64> @bar(<2 x i65> %t) {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[B:%.*]] = and <2 x i64> [[TMP1]], <i64 4294967295, i64 4294967295>
+; CHECK-NEXT:    [[B:%.*]] = and <2 x i64> [[TMP1]], splat (i64 4294967295)
 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 ;
   %a = trunc <2 x i65> %t to <2 x i32>
@@ -196,8 +196,8 @@ define <2 x i64> @bars(<2 x i65> %t) {
 
 define <2 x i64> @quxs(<2 x i64> %t) {
 ; CHECK-LABEL: @quxs(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32>
-; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], splat (i64 32)
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], splat (i64 32)
 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 ;
   %a = trunc <2 x i64> %t to <2 x i32>
@@ -207,8 +207,8 @@ define <2 x i64> @quxs(<2 x i64> %t) {
 
 define <2 x i64> @quxt(<2 x i64> %t) {
 ; CHECK-LABEL: @quxt(
-; CHECK-NEXT:    [[A:%.*]] = shl <2 x i64> [[T:%.*]], <i64 32, i64 32>
-; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[A]], <i64 32, i64 32>
+; CHECK-NEXT:    [[A:%.*]] = shl <2 x i64> [[T:%.*]], splat (i64 32)
+; CHECK-NEXT:    [[B:%.*]] = ashr exact <2 x i64> [[A]], splat (i64 32)
 ; CHECK-NEXT:    ret <2 x i64> [[B]]
 ;
   %a = shl <2 x i64> %t, <i64 32, i64 32>
@@ -278,7 +278,7 @@ define <4 x float> @f(i32 %a) {
 
 define <8 x i32> @pr24458(<8 x float> %n) {
 ; CHECK-LABEL: @pr24458(
-; CHECK-NEXT:    ret <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    ret <8 x i32> splat (i32 -1)
 ;
   %notequal_b_load_.i = fcmp une <8 x float> %n, zeroinitializer
   %equal_a_load72_.i = fcmp ueq <8 x float> %n, zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/vector-mul.ll b/llvm/test/Transforms/InstCombine/vector-mul.ll
index 4640e19011b01e..2b66a8b12bfce7 100644
--- a/llvm/test/Transforms/InstCombine/vector-mul.ll
+++ b/llvm/test/Transforms/InstCombine/vector-mul.ll
@@ -27,7 +27,7 @@ entry:
 define <4 x i8> @AddToSelf_i8(<4 x i8> %InVec)  {
 ; CHECK-LABEL: @AddToSelf_i8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], splat (i8 1)
 ; CHECK-NEXT:    ret <4 x i8> [[MUL]]
 ;
 entry:
@@ -38,7 +38,7 @@ entry:
 define <4 x i8> @SplatPow2Test1_i8(<4 x i8> %InVec)  {
 ; CHECK-LABEL: @SplatPow2Test1_i8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], <i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], splat (i8 2)
 ; CHECK-NEXT:    ret <4 x i8> [[MUL]]
 ;
 entry:
@@ -49,7 +49,7 @@ entry:
 define <4 x i8> @SplatPow2Test2_i8(<4 x i8> %InVec)  {
 ; CHECK-LABEL: @SplatPow2Test2_i8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i8> [[INVEC:%.*]], splat (i8 3)
 ; CHECK-NEXT:    ret <4 x i8> [[MUL]]
 ;
 entry:
@@ -71,7 +71,7 @@ entry:
 define <4 x i8> @MulTest2_i8(<4 x i8> %InVec)  {
 ; CHECK-LABEL: @MulTest2_i8(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], splat (i8 3)
 ; CHECK-NEXT:    ret <4 x i8> [[MUL]]
 ;
 entry:
@@ -124,7 +124,7 @@ entry:
 define <4 x i16> @AddToSelf_i16(<4 x i16> %InVec)  {
 ; CHECK-LABEL: @AddToSelf_i16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], splat (i16 1)
 ; CHECK-NEXT:    ret <4 x i16> [[MUL]]
 ;
 entry:
@@ -135,7 +135,7 @@ entry:
 define <4 x i16> @SplatPow2Test1_i16(<4 x i16> %InVec)  {
 ; CHECK-LABEL: @SplatPow2Test1_i16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], <i16 2, i16 2, i16 2, i16 2>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], splat (i16 2)
 ; CHECK-NEXT:    ret <4 x i16> [[MUL]]
 ;
 entry:
@@ -146,7 +146,7 @@ entry:
 define <4 x i16> @SplatPow2Test2_i16(<4 x i16> %InVec)  {
 ; CHECK-LABEL: @SplatPow2Test2_i16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], <i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i16> [[INVEC:%.*]], splat (i16 3)
 ; CHECK-NEXT:    ret <4 x i16> [[MUL]]
 ;
 entry:
@@ -168,7 +168,7 @@ entry:
 define <4 x i16> @MulTest2_i16(<4 x i16> %InVec)  {
 ; CHECK-LABEL: @MulTest2_i16(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], <i16 3, i16 3, i16 3, i16 3>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], splat (i16 3)
 ; CHECK-NEXT:    ret <4 x i16> [[MUL]]
 ;
 entry:
@@ -221,7 +221,7 @@ entry:
 define <4 x i32> @AddToSelf_i32(<4 x i32> %InVec)  {
 ; CHECK-LABEL: @AddToSelf_i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <4 x i32> [[MUL]]
 ;
 entry:
@@ -232,7 +232,7 @@ entry:
 define <4 x i32> @SplatPow2Test1_i32(<4 x i32> %InVec)  {
 ; CHECK-LABEL: @SplatPow2Test1_i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], splat (i32 2)
 ; CHECK-NEXT:    ret <4 x i32> [[MUL]]
 ;
 entry:
@@ -243,7 +243,7 @@ entry:
 define <4 x i32> @SplatPow2Test2_i32(<4 x i32> %InVec)  {
 ; CHECK-LABEL: @SplatPow2Test2_i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i32> [[INVEC:%.*]], splat (i32 3)
 ; CHECK-NEXT:    ret <4 x i32> [[MUL]]
 ;
 entry:
@@ -265,7 +265,7 @@ entry:
 define <4 x i32> @MulTest2_i32(<4 x i32> %InVec)  {
 ; CHECK-LABEL: @MulTest2_i32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], splat (i32 3)
 ; CHECK-NEXT:    ret <4 x i32> [[MUL]]
 ;
 entry:
@@ -318,7 +318,7 @@ entry:
 define <4 x i64> @AddToSelf_i64(<4 x i64> %InVec)  {
 ; CHECK-LABEL: @AddToSelf_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], splat (i64 1)
 ; CHECK-NEXT:    ret <4 x i64> [[MUL]]
 ;
 entry:
@@ -329,7 +329,7 @@ entry:
 define <4 x i64> @SplatPow2Test1_i64(<4 x i64> %InVec)  {
 ; CHECK-LABEL: @SplatPow2Test1_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], <i64 2, i64 2, i64 2, i64 2>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], splat (i64 2)
 ; CHECK-NEXT:    ret <4 x i64> [[MUL]]
 ;
 entry:
@@ -340,7 +340,7 @@ entry:
 define <4 x i64> @SplatPow2Test2_i64(<4 x i64> %InVec)  {
 ; CHECK-LABEL: @SplatPow2Test2_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], <i64 3, i64 3, i64 3, i64 3>
+; CHECK-NEXT:    [[MUL:%.*]] = shl <4 x i64> [[INVEC:%.*]], splat (i64 3)
 ; CHECK-NEXT:    ret <4 x i64> [[MUL]]
 ;
 entry:
@@ -362,7 +362,7 @@ entry:
 define <4 x i64> @MulTest2_i64(<4 x i64> %InVec)  {
 ; CHECK-LABEL: @MulTest2_i64(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], <i64 3, i64 3, i64 3, i64 3>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], splat (i64 3)
 ; CHECK-NEXT:    ret <4 x i64> [[MUL]]
 ;
 entry:
@@ -399,7 +399,7 @@ entry:
 define <4 x i8> @ShiftMulTest1(<4 x i8> %InVec) {
 ; CHECK-LABEL: @ShiftMulTest1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], <i8 12, i8 12, i8 12, i8 12>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i8> [[INVEC:%.*]], splat (i8 12)
 ; CHECK-NEXT:    ret <4 x i8> [[MUL]]
 ;
 entry:
@@ -411,7 +411,7 @@ entry:
 define <4 x i16> @ShiftMulTest2(<4 x i16> %InVec) {
 ; CHECK-LABEL: @ShiftMulTest2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], <i16 12, i16 12, i16 12, i16 12>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i16> [[INVEC:%.*]], splat (i16 12)
 ; CHECK-NEXT:    ret <4 x i16> [[MUL]]
 ;
 entry:
@@ -423,7 +423,7 @@ entry:
 define <4 x i32> @ShiftMulTest3(<4 x i32> %InVec) {
 ; CHECK-LABEL: @ShiftMulTest3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], <i32 12, i32 12, i32 12, i32 12>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i32> [[INVEC:%.*]], splat (i32 12)
 ; CHECK-NEXT:    ret <4 x i32> [[MUL]]
 ;
 entry:
@@ -435,7 +435,7 @@ entry:
 define <4 x i64> @ShiftMulTest4(<4 x i64> %InVec) {
 ; CHECK-LABEL: @ShiftMulTest4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], <i64 12, i64 12, i64 12, i64 12>
+; CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[INVEC:%.*]], splat (i64 12)
 ; CHECK-NEXT:    ret <4 x i64> [[MUL]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
index 65d00083532621..5e141f6aacaf2b 100644
--- a/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
+++ b/llvm/test/Transforms/InstCombine/vector-reduce-min-max-known.ll
@@ -16,7 +16,7 @@ define i1 @vec_reduce_umax_non_zero(<4 x i8> %xx) {
 
 define i1 @vec_reduce_umax_non_zero_fail(<4 x i8> %xx) {
 ; CHECK-LABEL: @vec_reduce_umax_non_zero_fail(
-; CHECK-NEXT:    [[X:%.*]] = add nsw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[X:%.*]] = add nsw <4 x i8> [[XX:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> [[X]])
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
diff --git a/llvm/test/Transforms/InstCombine/vector-trunc.ll b/llvm/test/Transforms/InstCombine/vector-trunc.ll
index bccb12e66eba19..85d6de4a3977c2 100644
--- a/llvm/test/Transforms/InstCombine/vector-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/vector-trunc.ll
@@ -3,9 +3,9 @@
 
 define <4 x i16> @trunc_add_nsw(<4 x i32> %0) {
 ; CHECK-LABEL: @trunc_add_nsw(
-; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP0:%.*]], <i32 17, i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP0:%.*]], splat (i32 17)
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc nsw <4 x i32> [[TMP2]] to <4 x i16>
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i16> [[TMP3]], splat (i16 1)
 ; CHECK-NEXT:    ret <4 x i16> [[TMP4]]
 ;
   %2 = ashr <4 x i32> %0, <i32 17, i32 17, i32 17, i32 17>
@@ -16,9 +16,9 @@ define <4 x i16> @trunc_add_nsw(<4 x i32> %0) {
 
 define <4 x i16> @trunc_add_no_nsw(<4 x i32> %0) {
 ; CHECK-LABEL: @trunc_add_no_nsw(
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP0:%.*]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP0:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc nuw <4 x i32> [[TMP2]] to <4 x i16>
-; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i16> [[TMP3]], splat (i16 1)
 ; CHECK-NEXT:    ret <4 x i16> [[TMP4]]
 ;
   %2 = ashr <4 x i32> %0, <i32 16, i32 16, i32 16, i32 16>
@@ -31,7 +31,7 @@ define <4 x i16> @trunc_add_mixed(<4 x i32> %0) {
 ; CHECK-LABEL: @trunc_add_mixed(
 ; CHECK-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP0:%.*]], <i32 17, i32 16, i32 17, i32 16>
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
-; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i16> [[TMP3]], splat (i16 1)
 ; CHECK-NEXT:    ret <4 x i16> [[TMP4]]
 ;
   %2 = ashr <4 x i32> %0, <i32 17, i32 16, i32 17, i32 16>
diff --git a/llvm/test/Transforms/InstCombine/vector-udiv.ll b/llvm/test/Transforms/InstCombine/vector-udiv.ll
index c817b3a1ac5a0a..ef30498adcb1c6 100644
--- a/llvm/test/Transforms/InstCombine/vector-udiv.ll
+++ b/llvm/test/Transforms/InstCombine/vector-udiv.ll
@@ -3,7 +3,7 @@
 
 define <4 x i32> @test_v4i32_splatconst_pow2(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_splatconst_pow2(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[A0:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = udiv <4 x i32> %a0, <i32 2, i32 2, i32 2, i32 2>
@@ -22,7 +22,7 @@ define <4 x i32> @test_v4i32_const_pow2(<4 x i32> %a0) {
 ; X udiv C, where C >= signbit
 define <4 x i32> @test_v4i32_negconstsplat(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_negconstsplat(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> [[A0:%.*]], <i32 -4, i32 -4, i32 -4, i32 -4>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> [[A0:%.*]], splat (i32 -4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
@@ -51,7 +51,7 @@ define <4 x i32> @test_v4i32_negconst_undef(<4 x i32> %a0) {
 ; X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
 define <4 x i32> @test_v4i32_shl_splatconst_pow2(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: @test_v4i32_shl_splatconst_pow2(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A1:%.*]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A1:%.*]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[A0:%.*]], [[TMP1]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
@@ -74,7 +74,7 @@ define <4 x i32> @test_v4i32_shl_const_pow2(<4 x i32> %a0, <4 x i32> %a1) {
 ; X udiv (zext (C1 << N)), where C1 is "1<<C2"  -->  X >> (N+C2)
 define <4 x i32> @test_v4i32_zext_shl_splatconst_pow2(<4 x i32> %a0, <4 x i16> %a1) {
 ; CHECK-LABEL: @test_v4i32_zext_shl_splatconst_pow2(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[A1:%.*]], <i16 2, i16 2, i16 2, i16 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[A1:%.*]], splat (i16 2)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext nneg <4 x i16> [[TMP1]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[A0:%.*]], [[TMP2]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
diff --git a/llvm/test/Transforms/InstCombine/vector-urem.ll b/llvm/test/Transforms/InstCombine/vector-urem.ll
index 627789a03ef6ca..d087bbbe06abf6 100644
--- a/llvm/test/Transforms/InstCombine/vector-urem.ll
+++ b/llvm/test/Transforms/InstCombine/vector-urem.ll
@@ -3,7 +3,7 @@
 
 define <4 x i32> @test_v4i32_splatconst_pow2(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_splatconst_pow2(
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A0:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = urem <4 x i32> %a0, <i32 2, i32 2, i32 2, i32 2>
@@ -29,7 +29,7 @@ define <4 x i32> @test_v4i32_const_pow2_poison(<4 x i32> %a0) {
 
 define <4 x i32> @test_v4i32_one(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_one(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
@@ -39,7 +39,7 @@ define <4 x i32> @test_v4i32_one(<4 x i32> %a0) {
 
 define <4 x i32> @test_v4i32_one_poison(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_one_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32>
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
@@ -50,8 +50,8 @@ define <4 x i32> @test_v4i32_one_poison(<4 x i32> %a0) {
 define <4 x i32> @test_v4i32_negconstsplat(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_negconstsplat(
 ; CHECK-NEXT:    [[A0_FR:%.*]] = freeze <4 x i32> [[A0:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i32> [[A0_FR]], <i32 -3, i32 -3, i32 -3, i32 -3>
-; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[A0_FR]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i32> [[A0_FR]], splat (i32 -3)
+; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[A0_FR]], splat (i32 3)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A0_FR]], <4 x i32> [[TMP2]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/vector-xor.ll b/llvm/test/Transforms/InstCombine/vector-xor.ll
index 13894ef85b5da8..3b37684df9e36a 100644
--- a/llvm/test/Transforms/InstCombine/vector-xor.ll
+++ b/llvm/test/Transforms/InstCombine/vector-xor.ll
@@ -33,7 +33,7 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
 
 define <4 x i32> @test_v4i32_xor_bswap_splatconst(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_xor_bswap_splatconst(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A0:%.*]], <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A0:%.*]], splat (i32 -16777216)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
@@ -68,7 +68,7 @@ define <4 x i32> @test_v4i32_xor_bswap_const_poison(<4 x i32> %a0) {
 
 define <4 x i32> @test_v4i32_demorgan_and(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @test_v4i32_demorgan_and(
-; CHECK-NEXT:    [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP1:%.*]] = or <4 x i32> [[X:%.*]], [[Y_NOT]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
@@ -82,7 +82,7 @@ define <4 x i32> @test_v4i32_demorgan_and(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @test_v4i32_demorgan_or(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @test_v4i32_demorgan_or(
-; CHECK-NEXT:    [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[Y_NOT:%.*]] = xor <4 x i32> [[Y:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[X:%.*]], [[Y_NOT]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
@@ -120,7 +120,7 @@ define <4 x i32> @test_v4i32_not_ashr_not_poison(<4 x i32> %x, <4 x i32> %y) {
 
 define <4 x i32> @test_v4i32_not_ashr_negative_splatconst(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_not_ashr_negative_splatconst(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> <i32 2, i32 2, i32 2, i32 2>, [[A0:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> splat (i32 2), [[A0:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = ashr <4 x i32> <i32 -3, i32 -3, i32 -3, i32 -3>, %a0
@@ -152,7 +152,7 @@ define <4 x i32> @test_v4i32_not_ashr_negative_const_poison(<4 x i32> %a0) {
 
 define <4 x i32> @test_v4i32_not_lshr_nonnegative_splatconst(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_not_lshr_nonnegative_splatconst(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> <i32 -4, i32 -4, i32 -4, i32 -4>, [[A0:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> splat (i32 -4), [[A0:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = lshr <4 x i32> <i32  3, i32  3, i32  3, i32  3>, %a0
@@ -184,7 +184,7 @@ define <4 x i32> @test_v4i32_not_lshr_nonnegative_const_poison(<4 x i32> %a0) {
 
 define <4 x i32> @test_v4i32_not_sub_splatconst(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_not_sub_splatconst(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 -4, i32 -4, i32 -4, i32 -4>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], splat (i32 -4)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = sub <4 x i32> <i32  3, i32  3, i32  3, i32  3>, %a0
@@ -216,7 +216,7 @@ define <4 x i32> @test_v4i32_not_sub_const_poison(<4 x i32> %a0) {
 
 define <4 x i32> @test_v4i32_xor_signmask_sub_splatconst(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_xor_signmask_sub_splatconst(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> <i32 -2147483645, i32 -2147483645, i32 -2147483645, i32 -2147483645>, [[A0:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> splat (i32 -2147483645), [[A0:%.*]]
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = sub <4 x i32> <i32  3, i32  3, i32  3, i32  3>, %a0
@@ -227,7 +227,7 @@ define <4 x i32> @test_v4i32_xor_signmask_sub_splatconst(<4 x i32> %a0) {
 define <4 x i32> @test_v4i32_xor_signmask_sub_const(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_xor_signmask_sub_const(
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> <i32 3, i32 5, i32 -1, i32 15>, [[A0:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 -2147483648)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %1 = sub <4 x i32> <i32  3, i32 5, i32 -1, i32 15>, %a0
@@ -250,7 +250,7 @@ define <4 x i32> @test_v4i32_xor_signmask_sub_const_poison(<4 x i32> %a0) {
 
 define <4 x i32> @test_v4i32_xor_signmask_add_splatconst(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_xor_signmask_add_splatconst(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 -2147483645, i32 -2147483645, i32 -2147483645, i32 -2147483645>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], splat (i32 -2147483645)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = add <4 x i32> <i32  3, i32  3, i32  3, i32  3>, %a0
@@ -261,7 +261,7 @@ define <4 x i32> @test_v4i32_xor_signmask_add_splatconst(<4 x i32> %a0) {
 define <4 x i32> @test_v4i32_xor_signmask_add_const(<4 x i32> %a0) {
 ; CHECK-LABEL: @test_v4i32_xor_signmask_add_const(
 ; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 3, i32 5, i32 -1, i32 15>
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 -2147483648)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
   %1 = add <4 x i32> <i32  3, i32 5, i32 -1, i32 15>, %a0
diff --git a/llvm/test/Transforms/InstCombine/with_overflow.ll b/llvm/test/Transforms/InstCombine/with_overflow.ll
index 4e3cb34931d07e..f2eb5804ee25d0 100644
--- a/llvm/test/Transforms/InstCombine/with_overflow.ll
+++ b/llvm/test/Transforms/InstCombine/with_overflow.ll
@@ -616,7 +616,7 @@ declare { <4 x i8>, <4 x i1> } @llvm.umul.with.overflow.v4i8(<4 x i8>, <4 x i8>)
 
 define { <4 x i8>, <4 x i1> } @always_sadd_const_vector() nounwind {
 ; CHECK-LABEL: @always_sadd_const_vector(
-; CHECK-NEXT:    ret { <4 x i8>, <4 x i1> } { <4 x i8> <i8 -128, i8 -128, i8 -128, i8 -128>, <4 x i1> <i1 true, i1 true, i1 true, i1 true> }
+; CHECK-NEXT:    ret { <4 x i8>, <4 x i1> } { <4 x i8> splat (i8 -128), <4 x i1> <i1 true, i1 true, i1 true, i1 true> }
 ;
   %x = call { <4 x i8>, <4 x i1> } @llvm.sadd.with.overflow.v4i8(<4 x i8> <i8 127, i8 127, i8 127, i8 127>, <4 x i8> <i8 1, i8 1, i8 1, i8 1>)
   ret { <4 x i8>, <4 x i1> } %x
@@ -632,7 +632,7 @@ define { <4 x i8>, <4 x i1> } @always_uadd_const_vector() nounwind {
 
 define { <4 x i8>, <4 x i1> } @always_ssub_const_vector() nounwind {
 ; CHECK-LABEL: @always_ssub_const_vector(
-; CHECK-NEXT:    ret { <4 x i8>, <4 x i1> } { <4 x i8> <i8 127, i8 127, i8 127, i8 127>, <4 x i1> <i1 true, i1 true, i1 true, i1 true> }
+; CHECK-NEXT:    ret { <4 x i8>, <4 x i1> } { <4 x i8> splat (i8 127), <4 x i1> <i1 true, i1 true, i1 true, i1 true> }
 ;
   %x = call { <4 x i8>, <4 x i1> } @llvm.ssub.with.overflow.v4i8(<4 x i8> <i8 -128, i8 -128, i8 -128, i8 -128>, <4 x i8> <i8 1, i8 1, i8 1, i8 1>)
   ret { <4 x i8>, <4 x i1> } %x
@@ -640,7 +640,7 @@ define { <4 x i8>, <4 x i1> } @always_ssub_const_vector() nounwind {
 
 define { <4 x i8>, <4 x i1> } @always_usub_const_vector() nounwind {
 ; CHECK-LABEL: @always_usub_const_vector(
-; CHECK-NEXT:    ret { <4 x i8>, <4 x i1> } { <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, <4 x i1> <i1 true, i1 true, i1 true, i1 true> }
+; CHECK-NEXT:    ret { <4 x i8>, <4 x i1> } { <4 x i8> splat (i8 -1), <4 x i1> <i1 true, i1 true, i1 true, i1 true> }
 ;
   %x = call { <4 x i8>, <4 x i1> } @llvm.usub.with.overflow.v4i8(<4 x i8> <i8 0, i8 0, i8 0, i8 0>, <4 x i8> <i8 1, i8 1, i8 1, i8 1>)
   ret { <4 x i8>, <4 x i1> } %x
@@ -649,7 +649,7 @@ define { <4 x i8>, <4 x i1> } @always_usub_const_vector() nounwind {
 ; NOTE: LLVM doesn't (yet) detect the multiplication always results in a overflow
 define { <4 x i8>, <4 x i1> } @always_smul_const_vector() nounwind {
 ; CHECK-LABEL: @always_smul_const_vector(
-; CHECK-NEXT:    [[X:%.*]] = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> <i8 127, i8 127, i8 127, i8 127>, <4 x i8> <i8 3, i8 3, i8 3, i8 3>)
+; CHECK-NEXT:    [[X:%.*]] = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> splat (i8 127), <4 x i8> splat (i8 3))
 ; CHECK-NEXT:    ret { <4 x i8>, <4 x i1> } [[X]]
 ;
   %x = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> <i8 127, i8 127, i8 127, i8 127>, <4 x i8> <i8 3, i8 3, i8 3, i8 3>)
@@ -658,7 +658,7 @@ define { <4 x i8>, <4 x i1> } @always_smul_const_vector() nounwind {
 
 define { <4 x i8>, <4 x i1> } @always_umul_const_vector() nounwind {
 ; CHECK-LABEL: @always_umul_const_vector(
-; CHECK-NEXT:    ret { <4 x i8>, <4 x i1> } { <4 x i8> <i8 -3, i8 -3, i8 -3, i8 -3>, <4 x i1> <i1 true, i1 true, i1 true, i1 true> }
+; CHECK-NEXT:    ret { <4 x i8>, <4 x i1> } { <4 x i8> splat (i8 -3), <4 x i1> <i1 true, i1 true, i1 true, i1 true> }
 ;
   %x = call { <4 x i8>, <4 x i1> } @llvm.umul.with.overflow.v4i8(<4 x i8> <i8 255, i8 255, i8 255, i8 255>, <4 x i8> <i8 3, i8 3, i8 3, i8 3>)
   ret { <4 x i8>, <4 x i1> } %x
@@ -781,7 +781,7 @@ define i8 @smul_neg1(i8 %x, ptr %p) {
 define <4 x i8> @smul_neg1_vec(<4 x i8> %x, ptr %p) {
 ; CHECK-LABEL: @smul_neg1_vec(
 ; CHECK-NEXT:    [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[OV:%.*]] = icmp eq <4 x i8> [[X]], <i8 -128, i8 -128, i8 -128, i8 -128>
+; CHECK-NEXT:    [[OV:%.*]] = icmp eq <4 x i8> [[X]], splat (i8 -128)
 ; CHECK-NEXT:    store <4 x i1> [[OV]], ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    ret <4 x i8> [[R]]
 ;
@@ -795,7 +795,7 @@ define <4 x i8> @smul_neg1_vec(<4 x i8> %x, ptr %p) {
 define <4 x i8> @smul_neg1_vec_poison(<4 x i8> %x, ptr %p) {
 ; CHECK-LABEL: @smul_neg1_vec_poison(
 ; CHECK-NEXT:    [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[OV:%.*]] = icmp eq <4 x i8> [[X]], <i8 -128, i8 -128, i8 -128, i8 -128>
+; CHECK-NEXT:    [[OV:%.*]] = icmp eq <4 x i8> [[X]], splat (i8 -128)
 ; CHECK-NEXT:    store <4 x i1> [[OV]], ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    ret <4 x i8> [[R]]
 ;
@@ -838,7 +838,7 @@ define i8 @umul_neg1(i8 %x, ptr %p) {
 define <4 x i8> @umul_neg1_vec(<4 x i8> %x, ptr %p) {
 ; CHECK-LABEL: @umul_neg1_vec(
 ; CHECK-NEXT:    [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[OV:%.*]] = icmp ugt <4 x i8> [[X]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[OV:%.*]] = icmp ugt <4 x i8> [[X]], splat (i8 1)
 ; CHECK-NEXT:    store <4 x i1> [[OV]], ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    ret <4 x i8> [[R]]
 ;
@@ -852,7 +852,7 @@ define <4 x i8> @umul_neg1_vec(<4 x i8> %x, ptr %p) {
 define <4 x i8> @umul_neg1_vec_poison(<4 x i8> %x, ptr %p) {
 ; CHECK-LABEL: @umul_neg1_vec_poison(
 ; CHECK-NEXT:    [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]]
-; CHECK-NEXT:    [[OV:%.*]] = icmp ugt <4 x i8> [[X]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[OV:%.*]] = icmp ugt <4 x i8> [[X]], splat (i8 1)
 ; CHECK-NEXT:    store <4 x i1> [[OV]], ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    ret <4 x i8> [[R]]
 ;
@@ -865,8 +865,8 @@ define <4 x i8> @umul_neg1_vec_poison(<4 x i8> %x, ptr %p) {
 
 define <4 x i1> @smul_not_neg1_vec(<4 x i8> %x) {
 ; CHECK-LABEL: @smul_not_neg1_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i8> [[X:%.*]], <i8 -43, i8 -43, i8 -43, i8 -43>
-; CHECK-NEXT:    [[OV:%.*]] = icmp ult <4 x i8> [[TMP1]], <i8 -85, i8 -85, i8 -85, i8 -85>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i8> [[X:%.*]], splat (i8 -43)
+; CHECK-NEXT:    [[OV:%.*]] = icmp ult <4 x i8> [[TMP1]], splat (i8 -85)
 ; CHECK-NEXT:    ret <4 x i1> [[OV]]
 ;
   %m = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> %x, <4 x i8> <i8 -3, i8 -3, i8 poison, i8 -3>)
@@ -945,8 +945,8 @@ define i8 @umul_256(i8 %x, ptr %p) {
 
 define <4 x i8> @umul_4_vec_poison(<4 x i8> %x, ptr %p) {
 ; CHECK-LABEL: @umul_4_vec_poison(
-; CHECK-NEXT:    [[R:%.*]] = shl <4 x i8> [[X:%.*]], <i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEXT:    [[OV:%.*]] = icmp ugt <4 x i8> [[X]], <i8 63, i8 63, i8 63, i8 63>
+; CHECK-NEXT:    [[R:%.*]] = shl <4 x i8> [[X:%.*]], splat (i8 2)
+; CHECK-NEXT:    [[OV:%.*]] = icmp ugt <4 x i8> [[X]], splat (i8 63)
 ; CHECK-NEXT:    store <4 x i1> [[OV]], ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    ret <4 x i8> [[R]]
 ;
@@ -1035,8 +1035,8 @@ define i8 @smul_128(i8 %x, ptr %p) {
 
 define <4 x i8> @smul_2_vec_poison(<4 x i8> %x, ptr %p) {
 ; CHECK-LABEL: @smul_2_vec_poison(
-; CHECK-NEXT:    [[R:%.*]] = shl <4 x i8> [[X:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i8> [[X]], <i8 64, i8 64, i8 64, i8 64>
+; CHECK-NEXT:    [[R:%.*]] = shl <4 x i8> [[X:%.*]], splat (i8 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i8> [[X]], splat (i8 64)
 ; CHECK-NEXT:    [[OV:%.*]] = icmp slt <4 x i8> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    store <4 x i1> [[OV]], ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    ret <4 x i8> [[R]]
diff --git a/llvm/test/Transforms/InstCombine/xor-ashr.ll b/llvm/test/Transforms/InstCombine/xor-ashr.ll
index 6f501184b63574..0c0554adcf1230 100644
--- a/llvm/test/Transforms/InstCombine/xor-ashr.ll
+++ b/llvm/test/Transforms/InstCombine/xor-ashr.ll
@@ -53,8 +53,8 @@ define i128 @testi128i128(i128 %add) {
 
 define <4 x i8> @testv4i16i8(<4 x i16> %add) {
 ; CHECK-LABEL: @testv4i16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], <i16 -1, i16 -1, i16 -1, i16 -1>
-; CHECK-NEXT:    [[X:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> <i8 27, i8 27, i8 27, i8 27>, <4 x i8> <i8 -28, i8 -28, i8 -28, i8 -28>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], splat (i16 -1)
+; CHECK-NEXT:    [[X:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> splat (i8 27), <4 x i8> splat (i8 -28)
 ; CHECK-NEXT:    ret <4 x i8> [[X]]
 ;
   %sh = ashr <4 x i16> %add, <i16 15, i16 15, i16 15, i16 15>
@@ -65,7 +65,7 @@ define <4 x i8> @testv4i16i8(<4 x i16> %add) {
 
 define <4 x i8> @testv4i16i8_poison(<4 x i16> %add) {
 ; CHECK-LABEL: @testv4i16i8_poison(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], <i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i16> [[ADD:%.*]], splat (i16 -1)
 ; CHECK-NEXT:    [[X:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> <i8 27, i8 27, i8 poison, i8 27>, <4 x i8> <i8 -28, i8 -28, i8 poison, i8 -28>
 ; CHECK-NEXT:    ret <4 x i8> [[X]]
 ;
@@ -94,7 +94,7 @@ define i8 @wrongimm(i16 %add) {
 define <4 x i32> @vectorpoison(<6 x i32> %0) {
 ; CHECK-LABEL: @vectorpoison(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ISNOTNEG:%.*]] = icmp sgt <6 x i32> [[TMP0:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[ISNOTNEG:%.*]] = icmp sgt <6 x i32> [[TMP0:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[SHR:%.*]] = sext <6 x i1> [[ISNOTNEG]] to <6 x i32>
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <6 x i32> [[SHR]], <6 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 2>
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
diff --git a/llvm/test/Transforms/InstCombine/xor.ll b/llvm/test/Transforms/InstCombine/xor.ll
index 8308dc503d4da2..4b6a5b71948b04 100644
--- a/llvm/test/Transforms/InstCombine/xor.ll
+++ b/llvm/test/Transforms/InstCombine/xor.ll
@@ -111,7 +111,7 @@ define i1 @test9(i8 %A) {
 
 define <2 x i1> @test9vec(<2 x i8> %a) {
 ; CHECK-LABEL: @test9vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], <i8 89, i8 89>
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i8> [[A:%.*]], splat (i8 89)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %b = xor <2 x i8> %a, <i8 123, i8 123>
@@ -153,7 +153,7 @@ define i1 @test12(i8 %A) {
 
 define <2 x i1> @test12vec(<2 x i8> %a) {
 ; CHECK-LABEL: @test12vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], <i8 4, i8 4>
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i8> [[A:%.*]], splat (i8 4)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %b = xor <2 x i8> %a, <i8 4, i8 4>
@@ -222,7 +222,7 @@ define i32 @fold_zext_xor_sandwich(i1 %X) {
 define <2 x i32> @fold_zext_xor_sandwich_vec(<2 x i1> %X) {
 ; CHECK-LABEL: @fold_zext_xor_sandwich_vec(
 ; CHECK-NEXT:    [[Z:%.*]] = zext <2 x i1> [[X:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[Q:%.*]] = xor <2 x i32> [[Z]], <i32 3, i32 3>
+; CHECK-NEXT:    [[Q:%.*]] = xor <2 x i32> [[Z]], splat (i32 3)
 ; CHECK-NEXT:    ret <2 x i32> [[Q]]
 ;
   %Y = xor <2 x i1> %X, <i1 true, i1 true>
@@ -287,7 +287,7 @@ define i32 @test28(i32 %indvar) {
 
 define <2 x i32> @test28vec(<2 x i32> %indvar) {
 ; CHECK-LABEL: @test28vec(
-; CHECK-NEXT:    [[T214:%.*]] = add <2 x i32> [[INDVAR:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[T214:%.*]] = add <2 x i32> [[INDVAR:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[T214]]
 ;
   %t7 = add <2 x i32> %indvar, <i32 -2147483647, i32 -2147483647>
@@ -307,7 +307,7 @@ define i32 @test28_sub(i32 %indvar) {
 
 define <2 x i32> @test28_subvec(<2 x i32> %indvar) {
 ; CHECK-LABEL: @test28_subvec(
-; CHECK-NEXT:    [[T214:%.*]] = sub <2 x i32> <i32 1, i32 1>, [[INDVAR:%.*]]
+; CHECK-NEXT:    [[T214:%.*]] = sub <2 x i32> splat (i32 1), [[INDVAR:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[T214]]
 ;
   %t7 = sub <2 x i32> <i32 -2147483647, i32 -2147483647>, %indvar
@@ -327,7 +327,7 @@ define i32 @test29(i1 %C) {
 
 define <2 x i32> @test29vec(i1 %C) {
 ; CHECK-LABEL: @test29vec(
-; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> <i32 915, i32 915>, <2 x i32> <i32 113, i32 113>
+; CHECK-NEXT:    [[V:%.*]] = select i1 [[C:%.*]], <2 x i32> splat (i32 915), <2 x i32> splat (i32 113)
 ; CHECK-NEXT:    ret <2 x i32> [[V]]
 ;
   %A = select i1 %C, <2 x i32> <i32 1000, i32 1000>, <2 x i32> <i32 10, i32 10>
@@ -374,7 +374,7 @@ define <2 x i32> @test30vec(i1 %which) {
 ; CHECK:       delay:
 ; CHECK-NEXT:    br label [[FINAL]]
 ; CHECK:       final:
-; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 915, i32 915>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ splat (i32 915), [[ENTRY:%.*]] ], [ splat (i32 113), [[DELAY]] ]
 ; CHECK-NEXT:    ret <2 x i32> [[A]]
 ;
 entry:
@@ -759,7 +759,7 @@ define i32 @test45(i32 %x, i32 %y) {
 ; Check that we work with splat vectors also.
 define <4 x i32> @test46(<4 x i32> %x) {
 ; CHECK-LABEL: @test46(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X:%.*]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[X:%.*]], <4 x i32> splat (i32 255))
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %1 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -803,8 +803,8 @@ define i32 @test48(i32 %x) {
 
 define <2 x i32> @test48vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test48vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 1, i32 1>
-; CHECK-NEXT:    [[D:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], splat (i32 1)
+; CHECK-NEXT:    [[D:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> splat (i32 -1))
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
   %a = sub <2 x i32> <i32 -2, i32 -2>, %x
@@ -829,7 +829,7 @@ define i32 @test49(i32 %x) {
 
 define <2 x i32> @test49vec(<2 x i32> %x) {
 ; CHECK-LABEL: @test49vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> <i32 1, i32 1>, [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> splat (i32 1), [[X:%.*]]
 ; CHECK-NEXT:    [[D:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP1]], <2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <2 x i32> [[D]]
 ;
@@ -857,8 +857,8 @@ define i32 @test50(i32 %x, i32 %y) {
 
 define <2 x i32> @test50vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @test50vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> <i32 1, i32 1>, [[X:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> splat (i32 1), [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[E:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <2 x i32> [[E]]
 ;
@@ -887,8 +887,8 @@ define i32 @test51(i32 %x, i32 %y) {
 
 define <2 x i32> @test51vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @test51vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> <i32 -3, i32 -3>, [[X:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], <i32 -3, i32 -3>
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i32> splat (i32 -3), [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[Y:%.*]], splat (i32 -3)
 ; CHECK-NEXT:    [[E:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
 ; CHECK-NEXT:    ret <2 x i32> [[E]]
 ;
@@ -1011,8 +1011,8 @@ define i8 @not_shl(i8 %x) {
 
 define <2 x i8> @not_shl_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @not_shl_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
-; CHECK-NEXT:    [[R:%.*]] = shl <2 x i8> [[TMP1]], <i8 5, i8 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[X:%.*]], splat (i8 -1)
+; CHECK-NEXT:    [[R:%.*]] = shl <2 x i8> [[TMP1]], splat (i8 5)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %a = shl <2 x i8> %x, <i8 5, i8 5>
@@ -1061,7 +1061,7 @@ define i8 @not_lshr(i8 %x) {
 
 define <2 x i8> @not_lshr_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @not_lshr_vec(
-; CHECK-NEXT:    [[ISNOTNEG:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[ISNOTNEG:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = zext <2 x i1> [[ISNOTNEG]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -1124,7 +1124,7 @@ define i8 @not_ashr(i8 %x) {
 
 define <2 x i8> @not_ashr_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @not_ashr_vec(
-; CHECK-NEXT:    [[ISNOTNEG:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[ISNOTNEG:%.*]] = icmp sgt <2 x i8> [[X:%.*]], splat (i8 -1)
 ; CHECK-NEXT:    [[R:%.*]] = sext <2 x i1> [[ISNOTNEG]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
@@ -1222,7 +1222,7 @@ define i32 @xor_andn_commute4(i32 %pa, i32 %pb) {
 define <2 x i64> @xor_orn(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: @xor_orn(
 ; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[Z:%.*]] = xor <2 x i64> [[TMP1]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[Z:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1)
 ; CHECK-NEXT:    ret <2 x i64> [[Z]]
 ;
   %nota = xor <2 x i64> %a, <i64 -1, i64 -1>
@@ -1350,7 +1350,7 @@ define i32 @ctlz_pow2(i32 %x) {
 
 define <2 x i8> @cttz_pow2(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @cttz_pow2(
-; CHECK-NEXT:    [[S:%.*]] = shl nuw <2 x i8> <i8 1, i8 1>, [[X:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shl nuw <2 x i8> splat (i8 1), [[X:%.*]]
 ; CHECK-NEXT:    [[D:%.*]] = udiv exact <2 x i8> [[S]], [[Y:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[D]], i1 true)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
@@ -1561,10 +1561,10 @@ entry:
 define <2 x i32> @select_or_disjoint_xor_vec(<2 x i32> %a, i1 %c) {
 ; CHECK-LABEL: @select_or_disjoint_xor_vec(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C:%.*]], <2 x i32> zeroinitializer, <2 x i32> <i32 4, i32 4>
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C:%.*]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 4)
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 4)
 ; CHECK-NEXT:    [[OR:%.*]] = or disjoint <2 x i32> [[S]], [[SHL]]
-; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[OR]], <i32 4, i32 4>
+; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[OR]], splat (i32 4)
 ; CHECK-NEXT:    ret <2 x i32> [[XOR]]
 ;
 entry:
@@ -1595,10 +1595,10 @@ entry:
 define <2 x i32> @select_or_disjoint_or_vec(<2 x i32> %a, i1 %c) {
 ; CHECK-LABEL: @select_or_disjoint_or_vec(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C:%.*]], <2 x i32> zeroinitializer, <2 x i32> <i32 4, i32 4>
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], <i32 4, i32 4>
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C:%.*]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 4)
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], splat (i32 4)
 ; CHECK-NEXT:    [[OR:%.*]] = or disjoint <2 x i32> [[S]], [[SHL]]
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw <2 x i32> [[OR]], <i32 4, i32 4>
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw <2 x i32> [[OR]], splat (i32 4)
 ; CHECK-NEXT:    ret <2 x i32> [[ADD]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll b/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll
index c9da18d3d88bdb..271c303664b720 100644
--- a/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll
+++ b/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll
@@ -73,7 +73,7 @@ define i32 @zext_add_scalar(i1 %x) {
 
 define <2 x i32> @zext_add_vec_splat(<2 x i1> %x) {
 ; CHECK-LABEL: @zext_add_vec_splat(
-; CHECK-NEXT:    [[ADD:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> <i32 43, i32 43>, <2 x i32> <i32 42, i32 42>
+; CHECK-NEXT:    [[ADD:%.*]] = select <2 x i1> [[X:%.*]], <2 x i32> splat (i32 43), <2 x i32> splat (i32 42)
 ; CHECK-NEXT:    ret <2 x i32> [[ADD]]
 ;
   %zext = zext <2 x i1> %x to <2 x i32>
diff --git a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
index 384ff8d2b7a3a3..97412d6ad5f8de 100644
--- a/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
+++ b/llvm/test/Transforms/InstCombine/zext-ctlz-trunc-to-ctlz-add.ll
@@ -28,7 +28,7 @@ define i16 @trunc_ctlz_zext_i16_i32(i16 %x) {
 define <2 x i8> @trunc_ctlz_zext_v2i8_v2i33(<2 x i8> %x) {
 ; CHECK-LABEL: @trunc_ctlz_zext_v2i8_v2i33(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true)
-; CHECK-NEXT:    [[ZZ:%.*]] = add nuw nsw <2 x i8> [[TMP1]], <i8 25, i8 25>
+; CHECK-NEXT:    [[ZZ:%.*]] = add nuw nsw <2 x i8> [[TMP1]], splat (i8 25)
 ; CHECK-NEXT:    ret <2 x i8> [[ZZ]]
 ;
   %z = zext <2 x i8> %x to <2 x i33>
diff --git a/llvm/test/Transforms/InstCombine/zext.ll b/llvm/test/Transforms/InstCombine/zext.ll
index 872871cf15b033..da71556cc01cd2 100644
--- a/llvm/test/Transforms/InstCombine/zext.ll
+++ b/llvm/test/Transforms/InstCombine/zext.ll
@@ -194,9 +194,9 @@ define i32 @masked_bit_set(i32 %x, i32 %y) {
 
 define <2 x i32> @masked_bit_clear(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @masked_bit_clear(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[TMP2]], <i32 1, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %sh1 = shl <2 x i32> <i32 1, i32 1>, %y
@@ -210,7 +210,7 @@ define <2 x i32> @masked_bit_set_commute(<2 x i32> %px, <2 x i32> %y) {
 ; CHECK-LABEL: @masked_bit_set_commute(
 ; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> <i32 42, i32 3>, [[PX:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[R:%.*]] = and <2 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
   %x = srem <2 x i32> <i32 42, i32 3>, %px ; thwart complexity-based canonicalization
@@ -471,10 +471,10 @@ define i16 @zext_masked_bit_zero_to_smaller_bitwidth(i32 %a, i32 %b) {
 
 define <4 x i16> @zext_masked_bit_zero_to_smaller_bitwidth_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @zext_masked_bit_zero_to_smaller_bitwidth_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], [[B:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
-; CHECK-NEXT:    [[Z:%.*]] = and <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[Z:%.*]] = and <4 x i16> [[TMP3]], splat (i16 1)
 ; CHECK-NEXT:    ret <4 x i16> [[Z]]
 ;
   %shl = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %b
@@ -550,9 +550,9 @@ define i64 @zext_masked_bit_zero_to_larger_bitwidth(i32 %a, i32 %b) {
 
 define <4 x i64> @zext_masked_bit_zero_to_larger_bitwidth_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @zext_masked_bit_zero_to_larger_bitwidth_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    [[Z:%.*]] = zext nneg <4 x i32> [[TMP3]] to <4 x i64>
 ; CHECK-NEXT:    ret <4 x i64> [[Z]]
 ;
@@ -576,7 +576,7 @@ define i32 @notneg_zext_wider(i8 %x) {
 
 define <2 x i8> @notneg_zext_narrower(<2 x i32> %x) {
 ; CHECK-LABEL: @notneg_zext_narrower(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i32> [[X:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[R:%.*]] = zext <2 x i1> [[CMP]] to <2 x i8>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll b/llvm/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll
index a296aca65bbf68..9c364d16badee7 100644
--- a/llvm/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll
+++ b/llvm/test/Transforms/InstSimplify/2011-09-05-InsertExtractValue.ll
@@ -7,12 +7,12 @@ define void @test1() personality ptr @__gxx_personality_v0 {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    invoke void @bar()
-; CHECK-NEXT:    to label [[CONT:%.*]] unwind label [[LPAD:%.*]]
+; CHECK-NEXT:            to label [[CONT:%.*]] unwind label [[LPAD:%.*]]
 ; CHECK:       cont:
 ; CHECK-NEXT:    ret void
 ; CHECK:       lpad:
 ; CHECK-NEXT:    [[EX:%.*]] = landingpad { ptr, i32 }
-; CHECK-NEXT:    cleanup
+; CHECK-NEXT:            cleanup
 ; CHECK-NEXT:    resume { ptr, i32 } [[EX]]
 ;
 entry:
@@ -53,7 +53,7 @@ define i32 @test3(i32 %a, float %b) {
 
 define i8 @test4(<8 x i8> %V) {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[ADD:%.*]] = add <8 x i8> [[V:%.*]], bitcast (<1 x double> <double 0x319BEB8FD172E36> to <8 x i8>)
+; CHECK-NEXT:    [[ADD:%.*]] = add <8 x i8> [[V:%.*]], bitcast (<1 x double> splat (double 0x319BEB8FD172E36) to <8 x i8>)
 ; CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <8 x i8> [[ADD]], i32 6
 ; CHECK-NEXT:    ret i8 [[EXTRACT]]
 ;
diff --git a/llvm/test/Transforms/InstSimplify/AndOrXor.ll b/llvm/test/Transforms/InstSimplify/AndOrXor.ll
index 2e3a6052242038..3b701179aa65a3 100644
--- a/llvm/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/llvm/test/Transforms/InstSimplify/AndOrXor.ll
@@ -492,7 +492,7 @@ define <2 x i3> @and_of_different_cast_icmps_vec(<2 x i8> %i, <2 x i16> %j) {
 ; CHECK-LABEL: @and_of_different_cast_icmps_vec(
 ; CHECK-NEXT:    [[CMP0:%.*]] = icmp eq <2 x i8> [[I:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[CONV0:%.*]] = zext <2 x i1> [[CMP0]] to <2 x i3>
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i16> [[J:%.*]], <i16 1, i16 1>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt <2 x i16> [[J:%.*]], splat (i16 1)
 ; CHECK-NEXT:    [[CONV1:%.*]] = zext <2 x i1> [[CMP1]] to <2 x i3>
 ; CHECK-NEXT:    [[AND:%.*]] = and <2 x i3> [[CONV0]], [[CONV1]]
 ; CHECK-NEXT:    ret <2 x i3> [[AND]]
@@ -680,7 +680,7 @@ define i3 @or_xorn_and_commute1(i3 %a, i3 %b) {
 
 define <2 x i32> @or_xorn_and_commute2(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @or_xorn_and_commute2(
-; CHECK-NEXT:    [[NEGA:%.*]] = xor <2 x i32> [[A:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[NEGA:%.*]] = xor <2 x i32> [[A:%.*]], splat (i32 -1)
 ; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[B:%.*]], [[NEGA]]
 ; CHECK-NEXT:    ret <2 x i32> [[XOR]]
 ;
@@ -800,7 +800,7 @@ define i8 @lshr_perfect_mask(i8 %x) {
 
 define <2 x i8> @lshr_oversized_mask_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_oversized_mask_splat(
-; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 5, i8 5>
+; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[X:%.*]], splat (i8 5)
 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
 ;
   %sh = lshr <2 x i8> %x, <i8 5, i8 5>
@@ -821,7 +821,7 @@ define i8 @lshr_undersized_mask(i8 %x) {
 
 define <2 x i8> @shl_perfect_mask_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_perfect_mask_splat(
-; CHECK-NEXT:    [[SH:%.*]] = shl <2 x i8> [[X:%.*]], <i8 6, i8 6>
+; CHECK-NEXT:    [[SH:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 6)
 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
 ;
   %sh = shl <2 x i8> %x, <i8 6, i8 6>
@@ -841,8 +841,8 @@ define i8 @shl_oversized_mask(i8 %x) {
 
 define <2 x i8> @shl_undersized_mask_splat(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_undersized_mask_splat(
-; CHECK-NEXT:    [[SH:%.*]] = shl <2 x i8> [[X:%.*]], <i8 6, i8 6>
-; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i8> [[SH]], <i8 -120, i8 -120>
+; CHECK-NEXT:    [[SH:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 6)
+; CHECK-NEXT:    [[MASK:%.*]] = and <2 x i8> [[SH]], splat (i8 -120)
 ; CHECK-NEXT:    ret <2 x i8> [[MASK]]
 ;
   %sh = shl <2 x i8> %x, <i8 6, i8 6>
@@ -998,7 +998,7 @@ define <2 x i64> @shl_or_and1v(<2 x i32> %a, <2 x i1> %b) {
 define <2 x i64> @shl_or_and2v(<2 x i32> %a, <2 x i1> %b) {
 ; CHECK-LABEL: @shl_or_and2v(
 ; CHECK-NEXT:    [[T1:%.*]] = zext <2 x i1> [[B:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[T3:%.*]] = shl nuw <2 x i64> [[T1]], <i64 32, i64 32>
+; CHECK-NEXT:    [[T3:%.*]] = shl nuw <2 x i64> [[T1]], splat (i64 32)
 ; CHECK-NEXT:    ret <2 x i64> [[T3]]
 ;
   %t1 = zext <2 x i1> %b to <2 x i64>
@@ -1014,9 +1014,9 @@ define <2 x i32> @shl_or_and3v(<2 x i16> %a, <2 x i16> %b) {
 ; CHECK-LABEL: @shl_or_and3v(
 ; CHECK-NEXT:    [[T1:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32>
 ; CHECK-NEXT:    [[T2:%.*]] = zext <2 x i16> [[B:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[T3:%.*]] = shl nuw <2 x i32> [[T1]], <i32 16, i32 16>
+; CHECK-NEXT:    [[T3:%.*]] = shl nuw <2 x i32> [[T1]], splat (i32 16)
 ; CHECK-NEXT:    [[T4:%.*]] = or <2 x i32> [[T2]], [[T3]]
-; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[T4]], <i32 -65535, i32 -65535>
+; CHECK-NEXT:    [[T5:%.*]] = and <2 x i32> [[T4]], splat (i32 -65535)
 ; CHECK-NEXT:    ret <2 x i32> [[T5]]
 ;
   %t1 = zext <2 x i16> %a to <2 x i32>
@@ -1059,7 +1059,7 @@ define i89 @or_add_sub(i89 %x) {
 
 define <3 x i8> @or_sub_add(<3 x i8> %x) {
 ; CHECK-LABEL: @or_sub_add(
-; CHECK-NEXT:    ret <3 x i8> <i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    ret <3 x i8> splat (i8 -1)
 ;
   %a = add <3 x i8> %x, <i8 42, i8 -12, i8 0>
   %s = sub <3 x i8> <i8 -43, i8 11, i8 -1>, %x
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/bitcast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/bitcast.ll
index 9c75b66db50cc7..a21124eaad6c57 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/bitcast.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/bitcast.ll
@@ -4,7 +4,7 @@
 
 define <1 x i64> @test1() {
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT:    ret <1 x i64> <i64 63>
+; CHECK-NEXT:    ret <1 x i64> splat (i64 63)
 ;
   %A = bitcast i64 63 to <1 x i64>
   ret <1 x i64> %A
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
index a2ed8cd6178d86..b51f061d7918c3 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/cast.ll
@@ -57,7 +57,7 @@ define float @nan_f64_trunc() {
 
 define <3 x half> @nan_v3f64_trunc() {
 ; CHECK-LABEL: @nan_v3f64_trunc(
-; CHECK-NEXT:    ret <3 x half> <half 0xH7E00, half 0xH7E00, half 0xH7E00>
+; CHECK-NEXT:    ret <3 x half> splat (half 0xH7E00)
 ;
   %f = fptrunc <3 x double> <double 0x7FF0020000000000, double 0x7FF003FFFFFFFFFF, double 0x7FF8000000000001> to <3 x half>
   ret <3 x half> %f
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/saturating-add-sub.ll b/llvm/test/Transforms/InstSimplify/ConstProp/saturating-add-sub.ll
index deeb238bdd0e4e..fde98c43e7687d 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/saturating-add-sub.ll
@@ -61,7 +61,7 @@ define <2 x i8> @test_uadd_vector_no_sat(<2 x i8> %a) {
 
 define <2 x i8> @test_uadd_vector_sat(<2 x i8> %a) {
 ; CHECK-LABEL: @test_uadd_vector_sat(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %x = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> <i8 100, i8 200>, <2 x i8> <i8 250, i8 100>)
   ret <2 x i8> %x
@@ -77,7 +77,7 @@ define <2 x i8> @test_sadd_vector_no_sat(<2 x i8> %a) {
 
 define <2 x i8> @test_sadd_vector_sat_pos(<2 x i8> %a) {
 ; CHECK-LABEL: @test_sadd_vector_sat_pos(
-; CHECK-NEXT:    ret <2 x i8> <i8 127, i8 127>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 127)
 ;
   %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> <i8 100, i8 10>, <2 x i8> <i8 30, i8 120>)
   ret <2 x i8> %x
@@ -85,7 +85,7 @@ define <2 x i8> @test_sadd_vector_sat_pos(<2 x i8> %a) {
 
 define <2 x i8> @test_sadd_vector_sat_neg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_sadd_vector_sat_neg(
-; CHECK-NEXT:    ret <2 x i8> <i8 -128, i8 -128>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -128)
 ;
   %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> <i8 -100, i8 -10>, <2 x i8> <i8 -30, i8 -120>)
   ret <2 x i8> %x
@@ -157,7 +157,7 @@ define <2 x i8> @test_ssub_vector_no_sat(<2 x i8> %a) {
 
 define <2 x i8> @test_ssub_vector_sat_pos(<2 x i8> %a) {
 ; CHECK-LABEL: @test_ssub_vector_sat_pos(
-; CHECK-NEXT:    ret <2 x i8> <i8 127, i8 127>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 127)
 ;
   %x = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> <i8 100, i8 10>, <2 x i8> <i8 -30, i8 -120>)
   ret <2 x i8> %x
@@ -165,7 +165,7 @@ define <2 x i8> @test_ssub_vector_sat_pos(<2 x i8> %a) {
 
 define <2 x i8> @test_ssub_vector_sat_neg(<2 x i8> %a) {
 ; CHECK-LABEL: @test_ssub_vector_sat_neg(
-; CHECK-NEXT:    ret <2 x i8> <i8 -128, i8 -128>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -128)
 ;
   %x = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> <i8 -100, i8 -10>, <2 x i8> <i8 30, i8 120>)
   ret <2 x i8> %x
@@ -271,7 +271,7 @@ define <2 x i8> @test_ssub_vector_both_undef_splat() {
 
 define <2 x i8> @test_uadd_vector_op2_undef_splat() {
 ; CHECK-LABEL: @test_uadd_vector_op2_undef_splat(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %x = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> <i8 10, i8 20>, <2 x i8> undef)
   ret <2 x i8> %x
@@ -279,7 +279,7 @@ define <2 x i8> @test_uadd_vector_op2_undef_splat() {
 
 define <2 x i8> @test_sadd_vector_op1_undef_splat() {
 ; CHECK-LABEL: @test_sadd_vector_op1_undef_splat(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> <i8 10, i8 20>)
   ret <2 x i8> %x
@@ -311,7 +311,7 @@ define <2 x i8> @test_uadd_vector_op2_undef_mix1() {
 
 define <2 x i8> @test_uadd_vector_op2_undef_mix2() {
 ; CHECK-LABEL: @test_uadd_vector_op2_undef_mix2(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %x = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> <i8 10, i8 undef>, <2 x i8> <i8 undef, i8 20>)
   ret <2 x i8> %x
@@ -327,7 +327,7 @@ define <2 x i8> @test_sadd_vector_op1_undef_mix1() {
 
 define <2 x i8> @test_sadd_vector_op1_undef_mix2() {
 ; CHECK-LABEL: @test_sadd_vector_op1_undef_mix2(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %x = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> <i8 undef, i8 10>, <2 x i8> <i8 20, i8 undef>)
   ret <2 x i8> %x
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll b/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll
index 2be619828e9ea1..4aed1c5dd7d2a1 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/vectorgep-crash.ll
@@ -64,7 +64,7 @@ define <2 x ptr> @constant_undef_index() {
 
 define <2 x ptr> @constant_inbounds() {
 ; CHECK-LABEL: define <2 x ptr> @constant_inbounds() {
-; CHECK-NEXT:    ret <2 x ptr> getelementptr (i8, ptr @g, <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    ret <2 x ptr> getelementptr (i8, ptr @g, <2 x i64> splat (i64 1))
 ;
   %gep = getelementptr i8, ptr @g, <2 x i64> <i64 1, i64 1>
   ret <2 x ptr> %gep
diff --git a/llvm/test/Transforms/InstSimplify/add_vp.ll b/llvm/test/Transforms/InstSimplify/add_vp.ll
index 327d45c4f24cc5..320f3fbb303797 100644
--- a/llvm/test/Transforms/InstSimplify/add_vp.ll
+++ b/llvm/test/Transforms/InstSimplify/add_vp.ll
@@ -10,7 +10,7 @@ declare <2 x i8> @llvm.vp.sub.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
 ; Constant folding should just work.
 define <2 x i32> @constant_vp_add(<2 x i1> %mask, i32 %evl) {
 ; CHECK-LABEL: @constant_vp_add(
-; CHECK-NEXT:    [[Q:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> <i32 3, i32 3>, <2 x i32> <i32 7, i32 7>, <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
+; CHECK-NEXT:    [[Q:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> splat (i32 3), <2 x i32> splat (i32 7), <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
 ; CHECK-NEXT:    ret <2 x i32> [[Q]]
 ;
   %Q = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> <i32 3, i32 3>, <2 x i32> <i32 7, i32 7>, <2 x i1> %mask, i32 %evl)
diff --git a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
index 2e75a0d2c98abf..3f1672d66abf0d 100644
--- a/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
+++ b/llvm/test/Transforms/InstSimplify/bitcast-vector-fold.ll
@@ -68,11 +68,8 @@ define <4 x i32> @test8(<1 x i64> %y) {
 }
 
 define <4 x i32> @test9(<1 x i64> %y) {
-; CONSTVEC-LABEL: @test9(
-; CONSTVEC-NEXT:    ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
-;
-; CONSTSPLAT-LABEL: @test9(
-; CONSTSPLAT-NEXT:    ret <4 x i32> splat (i32 -1)
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    ret <4 x i32> splat (i32 -1)
 ;
   %c = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>
   ret <4 x i32> %c
@@ -80,7 +77,7 @@ define <4 x i32> @test9(<1 x i64> %y) {
 
 define <1 x i1> @test10() {
 ; CONSTVEC-LABEL: @test10(
-; CONSTVEC-NEXT:    [[RET:%.*]] = icmp eq <1 x i64> <i64 bitcast (<1 x double> <double 0xFFFFFFFFFFFFFFFF> to i64)>, zeroinitializer
+; CONSTVEC-NEXT:    [[RET:%.*]] = icmp eq <1 x i64> <i64 bitcast (<1 x double> splat (double 0xFFFFFFFFFFFFFFFF) to i64)>, zeroinitializer
 ; CONSTVEC-NEXT:    ret <1 x i1> [[RET]]
 ;
 ; CONSTSPLAT-LABEL: @test10(
@@ -93,11 +90,8 @@ define <1 x i1> @test10() {
 
 ; from MultiSource/Benchmarks/Bullet
 define <2 x float> @foo() {
-; CONSTVEC-LABEL: @foo(
-; CONSTVEC-NEXT:    ret <2 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>
-;
-; CONSTSPLAT-LABEL: @foo(
-; CONSTSPLAT-NEXT:    ret <2 x float> splat (float 0xFFFFFFFFE0000000)
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:    ret <2 x float> splat (float 0xFFFFFFFFE0000000)
 ;
   %cast = bitcast i64 -1 to <2 x float>
   ret <2 x float> %cast
@@ -105,22 +99,16 @@ define <2 x float> @foo() {
 
 
 define <2 x double> @foo2() {
-; CONSTVEC-LABEL: @foo2(
-; CONSTVEC-NEXT:    ret <2 x double> <double 0xFFFFFFFFFFFFFFFF, double 0xFFFFFFFFFFFFFFFF>
-;
-; CONSTSPLAT-LABEL: @foo2(
-; CONSTSPLAT-NEXT:    ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF)
+; CHECK-LABEL: @foo2(
+; CHECK-NEXT:    ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF)
 ;
   %cast = bitcast i128 -1 to <2 x double>
   ret <2 x double> %cast
 }
 
 define <1 x float> @foo3() {
-; CONSTVEC-LABEL: @foo3(
-; CONSTVEC-NEXT:    ret <1 x float> <float 0xFFFFFFFFE0000000>
-;
-; CONSTSPLAT-LABEL: @foo3(
-; CONSTSPLAT-NEXT:    ret <1 x float> splat (float 0xFFFFFFFFE0000000)
+; CHECK-LABEL: @foo3(
+; CHECK-NEXT:    ret <1 x float> splat (float 0xFFFFFFFFE0000000)
 ;
   %cast = bitcast i32 -1 to <1 x float>
   ret <1 x float> %cast
@@ -143,11 +131,8 @@ define double @foo5() {
 }
 
 define <2 x double> @foo6() {
-; CONSTVEC-LABEL: @foo6(
-; CONSTVEC-NEXT:    ret <2 x double> <double 0xFFFFFFFFFFFFFFFF, double 0xFFFFFFFFFFFFFFFF>
-;
-; CONSTSPLAT-LABEL: @foo6(
-; CONSTSPLAT-NEXT:    ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF)
+; CHECK-LABEL: @foo6(
+; CHECK-NEXT:    ret <2 x double> splat (double 0xFFFFFFFFFFFFFFFF)
 ;
   %cast = bitcast <4 x i32><i32 -1, i32 -1, i32 -1, i32 -1> to <2 x double>
   ret <2 x double> %cast
@@ -299,7 +284,7 @@ define <16 x i8> @bitcast_constexpr_16i8_8i16_u256uuu256uu() {
 
 define <1 x i32> @bitcast_constexpr_scalar_fp_to_vector_int() {
 ; CONSTVEC-LABEL: @bitcast_constexpr_scalar_fp_to_vector_int(
-; CONSTVEC-NEXT:    ret <1 x i32> <i32 1065353216>
+; CONSTVEC-NEXT:    ret <1 x i32> splat (i32 1065353216)
 ;
 ; CONSTSPLAT-LABEL: @bitcast_constexpr_scalar_fp_to_vector_int(
 ; CONSTSPLAT-NEXT:    ret <1 x i32> bitcast (<1 x float> splat (float 1.000000e+00) to <1 x i32>)
diff --git a/llvm/test/Transforms/InstSimplify/canonicalize.ll b/llvm/test/Transforms/InstSimplify/canonicalize.ll
index c3877ec0cef8d1..9d2bdd1b853e61 100644
--- a/llvm/test/Transforms/InstSimplify/canonicalize.ll
+++ b/llvm/test/Transforms/InstSimplify/canonicalize.ll
@@ -27,7 +27,7 @@ define <2 x float> @canonicalize_zero_vector() {
 
 define <2 x float> @canonicalize_negzero_vector() {
 ; CHECK-LABEL: @canonicalize_negzero_vector(
-; CHECK-NEXT:    ret <2 x float> <float -0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float -0.000000e+00)
 ;
   %ret = call <2 x float> @llvm.canonicalize.v2f32(<2 x float> <float -0.0, float -0.0>)
   ret <2 x float> %ret
diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll
index 8077f5ad5c6737..742e8c74a89481 100644
--- a/llvm/test/Transforms/InstSimplify/compare.ll
+++ b/llvm/test/Transforms/InstSimplify/compare.ll
@@ -1965,7 +1965,7 @@ define i1 @shl_pow2_cmp_ne_zero(i32 %x) {
 
 define <2 x i1> @shl_pow2_cmp_ne_zero_splat(<2 x i32> %x) {
 ; CHECK-LABEL: @shl_pow2_cmp_ne_zero_splat(
-; CHECK-NEXT:    [[S:%.*]] = shl <2 x i32> <i32 16, i32 16>, [[X:%.*]]
+; CHECK-NEXT:    [[S:%.*]] = shl <2 x i32> splat (i32 16), [[X:%.*]]
 ; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[S]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
diff --git a/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll b/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll
index 92d6cc30d6248e..34fa05856b22db 100644
--- a/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll
+++ b/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll
@@ -57,7 +57,7 @@ define i8 @knownbits_allones(i8 %x, i8 %y) {
 
 define <2 x i8> @add_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @add_vec(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %ret = add nuw <2 x i8> %x, <i8 -1, i8 -1>
   ret <2 x i8> %ret
diff --git a/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll b/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll
index 3f4a08807a4b41..1ea9f7fb8e46d0 100644
--- a/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll
+++ b/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll
@@ -72,7 +72,7 @@ define i8 @knownbits_negativeorzero(i8 %x, i8 %y) {
 
 define <2 x i8> @shl_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @shl_vec(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %ret = shl nuw <2 x i8> <i8 -1, i8 -1>, %x
   ret <2 x i8> %ret
diff --git a/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll b/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll
index 48cc8895aebbca..92e43654806a4b 100644
--- a/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll
+++ b/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll
@@ -121,7 +121,7 @@ define i64 @ctpop_x_and_negx_nz(i64 %x) {
 
 define <2 x i32> @ctpop_shl1_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @ctpop_shl1_vec(
-; CHECK-NEXT:    ret <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 1)
 ;
   %shl = shl <2 x i32> <i32 1 ,i32 1>, %x
   %cnt = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %shl)
@@ -141,7 +141,7 @@ define <2 x i32> @ctpop_shl2_1_vec(<2 x i32> %x) {
 
 define <2 x i32> @ctpop_lshr_intmin_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @ctpop_lshr_intmin_vec(
-; CHECK-NEXT:    ret <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 1)
 ;
   %shr = lshr <2 x i32> <i32 2147483648 ,i32 2147483648>, %x
   %cnt = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %shr)
@@ -162,7 +162,7 @@ define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec(<2 x i32> %x) {
 
 define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) {
 ; CHECK-LABEL: @ctpop_lshr_intmin_intmin_plus1_vec_nz(
-; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[SHR:%.*]] = lshr <2 x i32> <i32 -2147483648, i32 -2147483647>, [[X1]]
 ; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHR]])
 ; CHECK-NEXT:    ret <2 x i32> [[CNT]]
@@ -176,7 +176,7 @@ define <2 x i32> @ctpop_lshr_intmin_intmin_plus1_vec_nz(<2 x i32> %x) {
 
 define <2 x i32> @ctpop_shl2_1_vec_nz(<2 x i32> %x) {
 ; CHECK-LABEL: @ctpop_shl2_1_vec_nz(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 15, i32 15>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 15)
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> <i32 2, i32 1>, [[AND]]
 ; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[SHL]])
 ; CHECK-NEXT:    ret <2 x i32> [[CNT]]
@@ -202,7 +202,7 @@ define <2 x i32> @ctpop_x_and_negx_vec(<2 x i32> %x) {
 
 define <2 x i32> @ctpop_x_and_negx_vec_nz(<2 x i32> %x) {
 ; CHECK-LABEL: @ctpop_x_and_negx_vec_nz(
-; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[X1:%.*]] = or <2 x i32> [[X:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[SUB:%.*]] = sub <2 x i32> zeroinitializer, [[X1]]
 ; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[SUB]], [[X]]
 ; CHECK-NEXT:    [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[AND]])
diff --git a/llvm/test/Transforms/InstSimplify/div.ll b/llvm/test/Transforms/InstSimplify/div.ll
index 5ca2e8837b924b..ef982673dce594 100644
--- a/llvm/test/Transforms/InstSimplify/div.ll
+++ b/llvm/test/Transforms/InstSimplify/div.ll
@@ -398,8 +398,8 @@ define <2 x i8> @udiv_exact_trailing_zeros(<2 x i8> %x) {
 
 define <2 x i8> @udiv_exact_trailing_zeros_eq(<2 x i8> %x) {
 ; CHECK-LABEL: @udiv_exact_trailing_zeros_eq(
-; CHECK-NEXT:    [[O:%.*]] = or <2 x i8> [[X:%.*]], <i8 28, i8 28>
-; CHECK-NEXT:    [[R:%.*]] = udiv exact <2 x i8> [[O]], <i8 12, i8 12>
+; CHECK-NEXT:    [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 28)
+; CHECK-NEXT:    [[R:%.*]] = udiv exact <2 x i8> [[O]], splat (i8 12)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %o = or <2 x i8> %x, <i8 28, i8 28>
@@ -424,7 +424,7 @@ define i8 @udiv_trailing_zeros(i8 %x) {
 
 define <2 x i8> @udiv_exact_trailing_zeros_nonuniform_vector(<2 x i8> %x) {
 ; CHECK-LABEL: @udiv_exact_trailing_zeros_nonuniform_vector(
-; CHECK-NEXT:    [[O:%.*]] = or <2 x i8> [[X:%.*]], <i8 3, i8 3>
+; CHECK-NEXT:    [[O:%.*]] = or <2 x i8> [[X:%.*]], splat (i8 3)
 ; CHECK-NEXT:    [[R:%.*]] = udiv exact <2 x i8> [[O]], <i8 12, i8 1>
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstSimplify/exp10.ll b/llvm/test/Transforms/InstSimplify/exp10.ll
index 11617ceec666d0..a546bb1255d854 100644
--- a/llvm/test/Transforms/InstSimplify/exp10.ll
+++ b/llvm/test/Transforms/InstSimplify/exp10.ll
@@ -9,8 +9,8 @@ declare <vscale x 2 x float> @llvm.exp10.nxv2f32(<vscale x 2 x float>)
 
 
 define float @exp10_exp10(float %x) {
-; CHECK-LABEL: define float @exp10_exp10
-; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-LABEL: define float @exp10_exp10(
+; CHECK-SAME: float [[X:%.*]]) {
 ; CHECK-NEXT:    [[EXP100:%.*]] = call float @llvm.exp10.f32(float [[X]])
 ; CHECK-NEXT:    [[EXP101:%.*]] = call float @llvm.exp10.f32(float [[EXP100]])
 ; CHECK-NEXT:    ret float [[EXP101]]
@@ -21,8 +21,8 @@ define float @exp10_exp10(float %x) {
 }
 
 define <2 x float> @exp10_exp10_vector(<2 x float> %x) {
-; CHECK-LABEL: define <2 x float> @exp10_exp10_vector
-; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-LABEL: define <2 x float> @exp10_exp10_vector(
+; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:    [[EXP100:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[X]])
 ; CHECK-NEXT:    [[EXP101:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[EXP100]])
 ; CHECK-NEXT:    ret <2 x float> [[EXP101]]
@@ -33,8 +33,8 @@ define <2 x float> @exp10_exp10_vector(<2 x float> %x) {
 }
 
 define float @exp10_exp10_const(float %x) {
-; CHECK-LABEL: define float @exp10_exp10_const
-; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-LABEL: define float @exp10_exp10_const(
+; CHECK-SAME: float [[X:%.*]]) {
 ; CHECK-NEXT:    [[EXP101:%.*]] = call float @llvm.exp10.f32(float 0x7FF0000000000000)
 ; CHECK-NEXT:    ret float [[EXP101]]
 ;
@@ -44,8 +44,8 @@ define float @exp10_exp10_const(float %x) {
 }
 
 define <vscale x 2 x float> @exp10_exp10_scalable_vector(<vscale x 2 x float> %x) {
-; CHECK-LABEL: define <vscale x 2 x float> @exp10_exp10_scalable_vector
-; CHECK-SAME: (<vscale x 2 x float> [[X:%.*]]) {
+; CHECK-LABEL: define <vscale x 2 x float> @exp10_exp10_scalable_vector(
+; CHECK-SAME: <vscale x 2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:    [[EXP100:%.*]] = call <vscale x 2 x float> @llvm.exp10.nxv2f32(<vscale x 2 x float> [[X]])
 ; CHECK-NEXT:    [[EXP101:%.*]] = call <vscale x 2 x float> @llvm.exp10.nxv2f32(<vscale x 2 x float> [[EXP100]])
 ; CHECK-NEXT:    ret <vscale x 2 x float> [[EXP101]]
@@ -101,7 +101,7 @@ define <2 x float> @exp10_undef_vector() {
 
 define <2 x float> @exp10_zero_vector() {
 ; CHECK-LABEL: define <2 x float> @exp10_zero_vector() {
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
   %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> zeroinitializer)
   ret <2 x float> %ret
@@ -118,7 +118,7 @@ define <vscale x 2 x float> @exp10_zero_scalable_vector() {
 
 define <2 x float> @exp10_zero_negzero_vector() {
 ; CHECK-LABEL: define <2 x float> @exp10_zero_negzero_vector() {
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+00)
 ;
   %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> <float 0.0, float -0.0>)
   ret <2 x float> %ret
@@ -262,7 +262,7 @@ define ppc_fp128 @canonicalize_noncanonical_zero_1_ppcf128() {
 
 define <2 x float> @exp10_splat_4() {
 ; CHECK-LABEL: define <2 x float> @exp10_splat_4() {
-; CHECK-NEXT:    ret <2 x float> <float 1.000000e+04, float 1.000000e+04>
+; CHECK-NEXT:    ret <2 x float> splat (float 1.000000e+04)
 ;
   %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> <float 4.0, float 4.0>)
   ret <2 x float> %ret
@@ -270,7 +270,7 @@ define <2 x float> @exp10_splat_4() {
 
 define <2 x float> @exp10_splat_qnan() {
 ; CHECK-LABEL: define <2 x float> @exp10_splat_qnan() {
-; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>)
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> splat (float 0x7FF8000000000000))
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
   %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>)
@@ -279,7 +279,7 @@ define <2 x float> @exp10_splat_qnan() {
 
 define <2 x float> @exp10_splat_inf() {
 ; CHECK-LABEL: define <2 x float> @exp10_splat_inf() {
-; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000>)
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> splat (float 0x7FF0000000000000))
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
   %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000>)
@@ -288,7 +288,7 @@ define <2 x float> @exp10_splat_inf() {
 
 define <2 x float> @exp10_splat_neginf() {
 ; CHECK-LABEL: define <2 x float> @exp10_splat_neginf() {
-; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000>)
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> splat (float 0xFFF0000000000000))
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
   %ret = call <2 x float> @llvm.exp10.v2f32(<2 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000>)
diff --git a/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll b/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll
index b1d772890aff83..963953ad2b3bc6 100644
--- a/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll
+++ b/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll
@@ -80,7 +80,7 @@ define float @fadd_unary_fnegx(float %x) #0 {
 
 define <2 x float> @fadd_binary_fnegx_commute_vec(<2 x float> %x) #0 {
 ; CHECK-LABEL: @fadd_binary_fnegx_commute_vec(
-; CHECK-NEXT:    [[NEGX:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> [[X:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore")
+; CHECK-NEXT:    [[NEGX:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[X:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore")
 ; CHECK-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[X]], <2 x float> [[NEGX]], metadata !"round.tonearest", metadata !"fpexcept.ignore")
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
@@ -415,7 +415,7 @@ define double @frem_negzero_by_x(double %x) #0 {
 
 define <2 x double> @frem_negzero_by_x_vec_poison(<2 x double> %x) #0 {
 ; CHECK-LABEL: @frem_negzero_by_x_vec_poison(
-; CHECK-NEXT:    ret <2 x double> <double -0.000000e+00, double -0.000000e+00>
+; CHECK-NEXT:    ret <2 x double> splat (double -0.000000e+00)
 ;
   %r = call nnan <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double> <double poison, double -0.0>, <2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore")
   ret <2 x double> %r
diff --git a/llvm/test/Transforms/InstSimplify/fast-math.ll b/llvm/test/Transforms/InstSimplify/fast-math.ll
index 287f30b162f804..61bb5b976b98bb 100644
--- a/llvm/test/Transforms/InstSimplify/fast-math.ll
+++ b/llvm/test/Transforms/InstSimplify/fast-math.ll
@@ -397,7 +397,7 @@ define double @frem_negzero_by_x(double %x) {
 
 define <2 x double> @frem_negzero_by_x_vec_poison(<2 x double> %x) {
 ; CHECK-LABEL: @frem_negzero_by_x_vec_poison(
-; CHECK-NEXT:    ret <2 x double> <double -0.000000e+00, double -0.000000e+00>
+; CHECK-NEXT:    ret <2 x double> splat (double -0.000000e+00)
 ;
   %r = frem nnan <2 x double> <double poison, double -0.0>, %x
   ret <2 x double> %r
@@ -469,7 +469,7 @@ define float @fdiv_neg_swapped2(float %f) {
 
 define <2 x float> @fdiv_neg_vec_poison_elt(<2 x float> %f) {
 ; CHECK-LABEL: @fdiv_neg_vec_poison_elt(
-; CHECK-NEXT:    ret <2 x float> <float -1.000000e+00, float -1.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float -1.000000e+00)
 ;
   %neg = fsub <2 x float> <float 0.0, float poison>, %f
   %div = fdiv nnan <2 x float> %f, %neg
diff --git a/llvm/test/Transforms/InstSimplify/fdiv.ll b/llvm/test/Transforms/InstSimplify/fdiv.ll
index fb59011b91d5bd..91f9b556d66250 100644
--- a/llvm/test/Transforms/InstSimplify/fdiv.ll
+++ b/llvm/test/Transforms/InstSimplify/fdiv.ll
@@ -148,7 +148,7 @@ define <2 x float> @fdiv_nnan_nsz_ninf_by_zero_v2f32(<2 x float> %x) {
 
 define <2 x float> @fdiv_nnan_nsz_ninf_by_negzero_v2f32(<2 x float> %x) {
 ; CHECK-LABEL: @fdiv_nnan_nsz_ninf_by_negzero_v2f32(
-; CHECK-NEXT:    [[FDIV:%.*]] = fdiv nnan nsz <2 x float> [[X:%.*]], <float -0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT:    [[FDIV:%.*]] = fdiv nnan nsz <2 x float> [[X:%.*]], splat (float -0.000000e+00)
 ; CHECK-NEXT:    ret <2 x float> [[FDIV]]
 ;
   %fdiv = fdiv nnan nsz <2 x float> %x, <float -0.0, float -0.0>
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll
index 32ea4cb7cd198d..9a078a8f569dab 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll
@@ -15,7 +15,7 @@ define float @fsub_-0_x(float %a) #0 {
 
 define <2 x float> @fsub_-0_x_vec(<2 x float> %a) #0 {
 ; CHECK-LABEL: @fsub_-0_x_vec(
-; CHECK-NEXT:    [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore")
+; CHECK-NEXT:    [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore")
 ; CHECK-NEXT:    [[RET:%.*]] = fneg <2 x float> [[T1]]
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
@@ -68,8 +68,8 @@ define float @fneg_x(float %a) #0 {
 
 define <2 x float> @fsub_-0_-0_x_vec(<2 x float> %a) #0 {
 ; CHECK-LABEL: @fsub_-0_-0_x_vec(
-; CHECK-NEXT:    [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore")
-; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> [[T1]], metadata !"round.tonearest", metadata !"fpexcept.ignore")
+; CHECK-NEXT:    [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore")
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[T1]], metadata !"round.tonearest", metadata !"fpexcept.ignore")
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
   %t1 = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float><float -0.0, float -0.0>, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.ignore")
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index 7a35f09f03b995..d3178a103d42cd 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -221,7 +221,7 @@ define float @src_mul_nzero_neg(float nofpclass(inf nan pzero psub pnorm) %f) {
 
 define <2 x float> @src_mul_zero_neg(<2 x float> nofpclass(inf nan pzero psub pnorm) %f) {
 ; CHECK-LABEL: @src_mul_zero_neg(
-; CHECK-NEXT:    ret <2 x float> <float -0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT:    ret <2 x float> splat (float -0.000000e+00)
 ;
   %r = fmul <2 x float> <float 0.0, float 0.0>, %f
   ret <2 x float> %r
@@ -372,7 +372,7 @@ define float @fabs_select_positive_constants(i32 %c) {
 define <2 x float> @fabs_select_positive_constants_vector(i32 %c) {
 ; CHECK-LABEL: @fabs_select_positive_constants_vector(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> <float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 1.000000e+00), <2 x float> splat (float 2.000000e+00)
 ; CHECK-NEXT:    ret <2 x float> [[SELECT]]
 ;
   %cmp = icmp eq i32 %c, 0
@@ -397,7 +397,7 @@ define float @fabs_select_constant_variable(i32 %c, float %x) {
 define <2 x float> @fabs_select_constant_variable_vector(i32 %c, <2 x float> %x) {
 ; CHECK-LABEL: @fabs_select_constant_variable_vector(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> [[X:%.*]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 1.000000e+00), <2 x float> [[X:%.*]]
 ; CHECK-NEXT:    [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]])
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
@@ -423,7 +423,7 @@ define float @fabs_select_neg0_pos0(i32 %c) {
 define <2 x float> @fabs_select_neg0_pos0_vector(i32 %c) {
 ; CHECK-LABEL: @fabs_select_neg0_pos0_vector(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> zeroinitializer
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float -0.000000e+00), <2 x float> zeroinitializer
 ; CHECK-NEXT:    [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]])
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
@@ -449,7 +449,7 @@ define float @fabs_select_neg0_neg1(i32 %c) {
 define <2 x float> @fabs_select_neg0_neg1_vector(i32 %c) {
 ; CHECK-LABEL: @fabs_select_neg0_neg1_vector(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> <float -1.000000e+00, float -1.000000e+00>
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float -0.000000e+00), <2 x float> splat (float -1.000000e+00)
 ; CHECK-NEXT:    [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]])
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
@@ -474,7 +474,7 @@ define float @fabs_select_nan_nan(i32 %c) {
 define <2 x float> @fabs_select_nan_nan_vector(i32 %c) {
 ; CHECK-LABEL: @fabs_select_nan_nan_vector(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> <float 0x7FF8000100000000, float 0x7FF8000100000000>
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0x7FF8000000000000), <2 x float> splat (float 0x7FF8000100000000)
 ; CHECK-NEXT:    ret <2 x float> [[SELECT]]
 ;
   %cmp = icmp eq i32 %c, 0
@@ -499,7 +499,7 @@ define float @fabs_select_negnan_nan(i32 %c) {
 define <2 x float> @fabs_select_negnan_nan_vector(i32 %c) {
 ; CHECK-LABEL: @fabs_select_negnan_nan_vector(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000>, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0xFFF8000000000000), <2 x float> splat (float 0x7FF8000000000000)
 ; CHECK-NEXT:    [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]])
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
@@ -525,7 +525,7 @@ define float @fabs_select_negnan_negnan(i32 %c) {
 define <2 x float> @fabs_select_negnan_negnan_vector(i32 %c) {
 ; CHECK-LABEL: @fabs_select_negnan_negnan_vector(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000>, <2 x float> <float 0x7FF8000100000000, float 0x7FF8000100000000>
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0xFFF8000000000000), <2 x float> splat (float 0x7FF8000100000000)
 ; CHECK-NEXT:    [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]])
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
@@ -551,7 +551,7 @@ define float @fabs_select_negnan_negzero(i32 %c) {
 define <2 x float> @fabs_select_negnan_negzero_vector(i32 %c) {
 ; CHECK-LABEL: @fabs_select_negnan_negzero_vector(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000>, <2 x float> <float -0.000000e+00, float -0.000000e+00>
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0xFFF8000000000000), <2 x float> splat (float -0.000000e+00)
 ; CHECK-NEXT:    [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]])
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
@@ -577,7 +577,7 @@ define float @fabs_select_negnan_zero(i32 %c) {
 define <2 x float> @fabs_select_negnan_zero_vector(i32 %c) {
 ; CHECK-LABEL: @fabs_select_negnan_zero_vector(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> <float 0xFFF8000000000000, float 0xFFF8000000000000>, <2 x float> zeroinitializer
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 0xFFF8000000000000), <2 x float> zeroinitializer
 ; CHECK-NEXT:    [[FABS:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> [[SELECT]])
 ; CHECK-NEXT:    ret <2 x float> [[FABS]]
 ;
@@ -651,7 +651,7 @@ define float @fabs_sqrt_nnan_fabs(float %a) {
 define float @fabs_select_positive_constants_vector_extract(i32 %c) {
 ; CHECK-LABEL: @fabs_select_positive_constants_vector_extract(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C:%.*]], 0
-; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x float> <float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], <2 x float> splat (float 1.000000e+00), <2 x float> splat (float 2.000000e+00)
 ; CHECK-NEXT:    [[EXTRACT:%.*]] = extractelement <2 x float> [[SELECT]], i32 0
 ; CHECK-NEXT:    ret float [[EXTRACT]]
 ;
diff --git a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll
index 668a93ddf5a426..fff6cfd8a3b4bc 100644
--- a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll
+++ b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll
@@ -196,7 +196,7 @@ define <2 x float> @test_minnum_const_inf_nnan_comm_vec(<2 x float> %x) {
 
 define <2 x float> @test_maxnum_const_inf_nnan_comm_vec(<2 x float> %x) {
 ; CHECK-LABEL: @test_maxnum_const_inf_nnan_comm_vec(
-; CHECK-NEXT:    ret <2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000>
+; CHECK-NEXT:    ret <2 x float> splat (float 0x7FF0000000000000)
 ;
   %r = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> <float 0x7ff0000000000000, float 0x7ff0000000000000>, <2 x float> %x)
   ret <2 x float> %r
@@ -204,7 +204,7 @@ define <2 x float> @test_maxnum_const_inf_nnan_comm_vec(<2 x float> %x) {
 
 define <2 x float> @test_maximum_const_inf_nnan_comm_vec(<2 x float> %x) {
 ; CHECK-LABEL: @test_maximum_const_inf_nnan_comm_vec(
-; CHECK-NEXT:    ret <2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000>
+; CHECK-NEXT:    ret <2 x float> splat (float 0x7FF0000000000000)
 ;
   %r = call nnan <2 x float> @llvm.maximum.v2f32(<2 x float> <float 0x7ff0000000000000, float 0x7ff0000000000000>, <2 x float> %x)
   ret <2 x float> %r
@@ -727,7 +727,7 @@ define float @minnum_neginf(float %x) {
 
 define <2 x double> @minnum_neginf_commute_vec(<2 x double> %x) {
 ; CHECK-LABEL: @minnum_neginf_commute_vec(
-; CHECK-NEXT:    ret <2 x double> <double 0xFFF0000000000000, double 0xFFF0000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0xFFF0000000000000)
 ;
   %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> <double 0xFFF0000000000000, double 0xFFF0000000000000>, <2 x double> %x)
   ret <2 x double> %r
@@ -813,7 +813,7 @@ define float @maxnum_x_y_maxnum_z(float %x, float %y, float %z) {
 
 define <2 x double> @maxnum_inf(<2 x double> %x) {
 ; CHECK-LABEL: @maxnum_inf(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF0000000000000)
 ;
   %val = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double><double 0x7FF0000000000000, double 0x7FF0000000000000>)
   ret <2 x double> %val
@@ -915,7 +915,7 @@ define <2 x double> @minimum_nan_op1_vec_partial_poison(<2 x double> %x) {
 
 define <2 x double> @minimum_nan_op1_vec(<2 x double> %x) {
 ; CHECK-LABEL: @minimum_nan_op1_vec(
-; CHECK-NEXT:    ret <2 x double> <double 0x7FF800DEAD00DEAD, double 0x7FF800DEAD00DEAD>
+; CHECK-NEXT:    ret <2 x double> splat (double 0x7FF800DEAD00DEAD)
 ;
   %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> <double 0x7ff800dead00dead, double 0x7ff800dead00dead>)
   ret <2 x double> %r
@@ -1164,7 +1164,7 @@ define float @minimum_neginf(float %x) {
 
 define <2 x double> @minimum_neginf_commute_vec(<2 x double> %x) {
 ; CHECK-LABEL: @minimum_neginf_commute_vec(
-; CHECK-NEXT:    [[R:%.*]] = call <2 x double> @llvm.minimum.v2f64(<2 x double> <double 0xFFF0000000000000, double 0xFFF0000000000000>, <2 x double> [[X:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = call <2 x double> @llvm.minimum.v2f64(<2 x double> splat (double 0xFFF0000000000000), <2 x double> [[X:%.*]])
 ; CHECK-NEXT:    ret <2 x double> [[R]]
 ;
   %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> <double 0xFFF0000000000000, double 0xFFF0000000000000>, <2 x double> %x)
@@ -1185,7 +1185,7 @@ define float @minimum_inf(float %x) {
 
 define <2 x double> @maximum_inf(<2 x double> %x) {
 ; CHECK-LABEL: @maximum_inf(
-; CHECK-NEXT:    [[VAL:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[X:%.*]], <2 x double> <double 0x7FF0000000000000, double 0x7FF0000000000000>)
+; CHECK-NEXT:    [[VAL:%.*]] = call <2 x double> @llvm.maximum.v2f64(<2 x double> [[X:%.*]], <2 x double> splat (double 0x7FF0000000000000))
 ; CHECK-NEXT:    ret <2 x double> [[VAL]]
 ;
   %val = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double><double 0x7FF0000000000000, double 0x7FF0000000000000>)
diff --git a/llvm/test/Transforms/InstSimplify/fp-nan.ll b/llvm/test/Transforms/InstSimplify/fp-nan.ll
index bb557500822c14..8d2d9125404144 100644
--- a/llvm/test/Transforms/InstSimplify/fp-nan.ll
+++ b/llvm/test/Transforms/InstSimplify/fp-nan.ll
@@ -103,7 +103,7 @@ define <vscale x 1 x double> @fmul_nan_op0_scalable_vec_1(<vscale x 1 x double>
 
 define <2 x float> @fmul_nan_op1(<2 x float> %x) {
 ; CHECK-LABEL: @fmul_nan_op1(
-; CHECK-NEXT:    ret <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>
+; CHECK-NEXT:    ret <2 x float> splat (float 0x7FF8000000000000)
 ;
   %r = fmul <2 x float> %x, <float 0x7FF8000000000000, float 0x7FF8000000000000>
   ret <2 x float> %r
@@ -121,7 +121,7 @@ define <vscale x 1 x double> @fmul_nan_op1_scalable_vec(<vscale x 1 x double> %x
 
 define <2 x double> @fdiv_nan_op0(<2 x double> %x) {
 ; CHECK-LABEL: @fdiv_nan_op0(
-; CHECK-NEXT:    ret <2 x double> <double 0xFFF800000000000F, double 0xFFF800000000000F>
+; CHECK-NEXT:    ret <2 x double> splat (double 0xFFF800000000000F)
 ;
   %r = fdiv <2 x double> <double 0xFFF800000000000F, double 0xFFF800000000000F>, %x
   ret <2 x double>  %r
diff --git a/llvm/test/Transforms/InstSimplify/fptoi-range.ll b/llvm/test/Transforms/InstSimplify/fptoi-range.ll
index 95f2a9d50793c6..79d627a8b95654 100644
--- a/llvm/test/Transforms/InstSimplify/fptoi-range.ll
+++ b/llvm/test/Transforms/InstSimplify/fptoi-range.ll
@@ -157,7 +157,7 @@ define i1 @f16_ui_min2(half %f) {
 define <2 x i1> @v2f16_si_max(<2 x half> %f) {
 ; CHECK-LABEL: @v2f16_si_max(
 ; CHECK-NEXT:    [[I:%.*]] = fptosi <2 x half> [[F:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[C:%.*]] = icmp sge <2 x i32> [[I]], <i32 65504, i32 65504>
+; CHECK-NEXT:    [[C:%.*]] = icmp sge <2 x i32> [[I]], splat (i32 65504)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %i = fptosi <2 x half> %f to <2 x i32>
@@ -177,7 +177,7 @@ define <2 x i1> @v2f16_si_max2(<2 x half> %f) {
 define <2 x i1> @v2f16_si16_max2(<2 x half> %f) {
 ; CHECK-LABEL: @v2f16_si16_max2(
 ; CHECK-NEXT:    [[I:%.*]] = fptosi <2 x half> [[F:%.*]] to <2 x i16>
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i16> [[I]], <i16 -32, i16 -32>
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i16> [[I]], splat (i16 -32)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %i = fptosi <2 x half> %f to <2 x i16>
@@ -197,7 +197,7 @@ define <2 x i1> @v2f16_si_min1(<2 x half> %f) {
 define <2 x i1> @v2f16_si16_min1(<2 x half> %f) {
 ; CHECK-LABEL: @v2f16_si16_min1(
 ; CHECK-NEXT:    [[I:%.*]] = fptosi <2 x half> [[F:%.*]] to <2 x i16>
-; CHECK-NEXT:    [[C:%.*]] = icmp sge <2 x i16> [[I]], <i16 32, i16 32>
+; CHECK-NEXT:    [[C:%.*]] = icmp sge <2 x i16> [[I]], splat (i16 32)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %i = fptosi <2 x half> %f to <2 x i16>
@@ -208,7 +208,7 @@ define <2 x i1> @v2f16_si16_min1(<2 x half> %f) {
 define <2 x i1> @v2f16_si_min2(<2 x half> %f) {
 ; CHECK-LABEL: @v2f16_si_min2(
 ; CHECK-NEXT:    [[I:%.*]] = fptosi <2 x half> [[F:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i32> [[I]], <i32 -65504, i32 -65504>
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i32> [[I]], splat (i32 -65504)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %i = fptosi <2 x half> %f to <2 x i32>
@@ -219,7 +219,7 @@ define <2 x i1> @v2f16_si_min2(<2 x half> %f) {
 define <2 x i1> @v2f16_ui_max1(<2 x half> %f) {
 ; CHECK-LABEL: @v2f16_ui_max1(
 ; CHECK-NEXT:    [[I:%.*]] = fptoui <2 x half> [[F:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[C:%.*]] = icmp sge <2 x i32> [[I]], <i32 65504, i32 65504>
+; CHECK-NEXT:    [[C:%.*]] = icmp sge <2 x i32> [[I]], splat (i32 65504)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %i = fptoui <2 x half> %f to <2 x i32>
@@ -239,7 +239,7 @@ define <2 x i1> @v2f16_ui_max2(<2 x half> %f) {
 define <2 x i1> @v2f16_ui16_max2(<2 x half> %f) {
 ; CHECK-LABEL: @v2f16_ui16_max2(
 ; CHECK-NEXT:    [[I:%.*]] = fptoui <2 x half> [[F:%.*]] to <2 x i16>
-; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i16> [[I]], <i16 -32, i16 -32>
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i16> [[I]], splat (i16 -32)
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %i = fptoui <2 x half> %f to <2 x i16>
diff --git a/llvm/test/Transforms/InstSimplify/frexp.ll b/llvm/test/Transforms/InstSimplify/frexp.ll
index 63fe1dec692933..34cfce92bac43e 100644
--- a/llvm/test/Transforms/InstSimplify/frexp.ll
+++ b/llvm/test/Transforms/InstSimplify/frexp.ll
@@ -9,8 +9,8 @@ declare { <vscale x 2 x float>, <vscale x 2 x i32> } @llvm.frexp.nxv2f32.nxv2i32
 
 
 define { float, i32 } @frexp_frexp(float %x) {
-; CHECK-LABEL: define { float, i32 } @frexp_frexp
-; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-LABEL: define { float, i32 } @frexp_frexp(
+; CHECK-SAME: float [[X:%.*]]) {
 ; CHECK-NEXT:    [[FREXP0:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
 ; CHECK-NEXT:    ret { float, i32 } [[FREXP0]]
 ;
@@ -21,8 +21,8 @@ define { float, i32 } @frexp_frexp(float %x) {
 }
 
 define { <2 x float>, <2 x i32> } @frexp_frexp_vector(<2 x float> %x) {
-; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_frexp_vector
-; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_frexp_vector(
+; CHECK-SAME: <2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:    [[FREXP0:%.*]] = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> [[X]])
 ; CHECK-NEXT:    ret { <2 x float>, <2 x i32> } [[FREXP0]]
 ;
@@ -33,8 +33,8 @@ define { <2 x float>, <2 x i32> } @frexp_frexp_vector(<2 x float> %x) {
 }
 
 define { float, i32 } @frexp_frexp_const(float %x) {
-; CHECK-LABEL: define { float, i32 } @frexp_frexp_const
-; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-LABEL: define { float, i32 } @frexp_frexp_const(
+; CHECK-SAME: float [[X:%.*]]) {
 ; CHECK-NEXT:    ret { float, i32 } { float 6.562500e-01, i32 0 }
 ;
   %frexp0 = call { float, i32 } @llvm.frexp.f32.i32(float 42.0)
@@ -44,8 +44,8 @@ define { float, i32 } @frexp_frexp_const(float %x) {
 }
 
 define { <vscale x 2 x float>, <vscale x 2 x i32> } @frexp_frexp_scalable_vector(<vscale x 2 x float> %x) {
-; CHECK-LABEL: define { <vscale x 2 x float>, <vscale x 2 x i32> } @frexp_frexp_scalable_vector
-; CHECK-SAME: (<vscale x 2 x float> [[X:%.*]]) {
+; CHECK-LABEL: define { <vscale x 2 x float>, <vscale x 2 x i32> } @frexp_frexp_scalable_vector(
+; CHECK-SAME: <vscale x 2 x float> [[X:%.*]]) {
 ; CHECK-NEXT:    [[FREXP0:%.*]] = call { <vscale x 2 x float>, <vscale x 2 x i32> } @llvm.frexp.nxv2f32.nxv2i32(<vscale x 2 x float> [[X]])
 ; CHECK-NEXT:    ret { <vscale x 2 x float>, <vscale x 2 x i32> } [[FREXP0]]
 ;
@@ -252,7 +252,7 @@ define { ppc_fp128, i32} @canonicalize_noncanonical_zero_1_ppcf128() {
 
 define { <2 x float>, <2 x i32> } @frexp_splat_4() {
 ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_splat_4() {
-; CHECK-NEXT:    ret { <2 x float>, <2 x i32> } { <2 x float> <float 5.000000e-01, float 5.000000e-01>, <2 x i32> <i32 3, i32 3> }
+; CHECK-NEXT:    ret { <2 x float>, <2 x i32> } { <2 x float> splat (float 5.000000e-01), <2 x i32> splat (i32 3) }
 ;
   %ret = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> <float 4.0, float 4.0>)
   ret { <2 x float>, <2 x i32> } %ret
@@ -260,7 +260,7 @@ define { <2 x float>, <2 x i32> } @frexp_splat_4() {
 
 define { <2 x float>, <2 x i32> } @frexp_splat_qnan() {
 ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_splat_qnan() {
-; CHECK-NEXT:    ret { <2 x float>, <2 x i32> } { <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x i32> zeroinitializer }
+; CHECK-NEXT:    ret { <2 x float>, <2 x i32> } { <2 x float> splat (float 0x7FF8000000000000), <2 x i32> zeroinitializer }
 ;
   %ret = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>)
   ret { <2 x float>, <2 x i32> } %ret
@@ -268,7 +268,7 @@ define { <2 x float>, <2 x i32> } @frexp_splat_qnan() {
 
 define { <2 x float>, <2 x i32> } @frexp_splat_inf() {
 ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_splat_inf() {
-; CHECK-NEXT:    ret { <2 x float>, <2 x i32> } { <2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000>, <2 x i32> zeroinitializer }
+; CHECK-NEXT:    ret { <2 x float>, <2 x i32> } { <2 x float> splat (float 0x7FF0000000000000), <2 x i32> zeroinitializer }
 ;
   %ret = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000>)
   ret { <2 x float>, <2 x i32> } %ret
@@ -276,7 +276,7 @@ define { <2 x float>, <2 x i32> } @frexp_splat_inf() {
 
 define { <2 x float>, <2 x i32> } @frexp_splat_neginf() {
 ; CHECK-LABEL: define { <2 x float>, <2 x i32> } @frexp_splat_neginf() {
-; CHECK-NEXT:    ret { <2 x float>, <2 x i32> } { <2 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000>, <2 x i32> zeroinitializer }
+; CHECK-NEXT:    ret { <2 x float>, <2 x i32> } { <2 x float> splat (float 0xFFF0000000000000), <2 x i32> zeroinitializer }
 ;
   %ret = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000>)
   ret { <2 x float>, <2 x i32> } %ret
diff --git a/llvm/test/Transforms/InstSimplify/gep.ll b/llvm/test/Transforms/InstSimplify/gep.ll
index f682fe82ca7986..276707894146e8 100644
--- a/llvm/test/Transforms/InstSimplify/gep.ll
+++ b/llvm/test/Transforms/InstSimplify/gep.ll
@@ -206,7 +206,7 @@ define ptr @ptr_idx_scalar() {
 
 define <2 x ptr> @ptr_idx_vector() {
 ; CHECK-LABEL: @ptr_idx_vector(
-; CHECK-NEXT:    ret <2 x ptr> getelementptr (i32, ptr null, <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    ret <2 x ptr> getelementptr (i32, ptr null, <2 x i64> splat (i64 1))
 ;
   %gep = getelementptr i32, ptr null, <2 x i64> <i64 1, i64 1>
   ret <2 x ptr> %gep
@@ -224,7 +224,7 @@ define <4 x ptr> @ptr_idx_mix_scalar_vector(){
 
 define <4 x ptr> @vector_idx_scalar() {
 ; CHECK-LABEL: @vector_idx_scalar(
-; CHECK-NEXT:    ret <4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 1, i64 1, i64 1>)
+; CHECK-NEXT:    ret <4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> splat (i64 1))
 ;
   %gep = getelementptr i32, <4 x ptr> zeroinitializer, i64 1
   ret <4 x ptr> %gep
@@ -232,7 +232,7 @@ define <4 x ptr> @vector_idx_scalar() {
 
 define <4 x ptr> @vector_idx_vector() {
 ; CHECK-LABEL: @vector_idx_vector(
-; CHECK-NEXT:    ret <4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 1, i64 1, i64 1>)
+; CHECK-NEXT:    ret <4 x ptr> getelementptr (i32, <4 x ptr> zeroinitializer, <4 x i64> splat (i64 1))
 ;
   %gep = getelementptr i32, <4 x ptr> zeroinitializer, <4 x i64> <i64 1, i64 1, i64 1, i64 1>
   ret <4 x ptr> %gep
@@ -370,6 +370,14 @@ define <8 x ptr> @gep_vector_index_op3_poison_constant_index_afterwards(ptr %ptr
 }
 
 define i64 @gep_array_of_scalable_vectors_ptrdiff(ptr %ptr) {
+; CHECK-LABEL: @gep_array_of_scalable_vectors_ptrdiff(
+; CHECK-NEXT:    [[C1:%.*]] = getelementptr inbounds [8 x <vscale x 4 x i32>], ptr [[PTR:%.*]], i64 4
+; CHECK-NEXT:    [[C2:%.*]] = getelementptr inbounds [8 x <vscale x 4 x i32>], ptr [[PTR]], i64 6
+; CHECK-NEXT:    [[C1_INT:%.*]] = ptrtoint ptr [[C1]] to i64
+; CHECK-NEXT:    [[C2_INT:%.*]] = ptrtoint ptr [[C2]] to i64
+; CHECK-NEXT:    [[DIFF:%.*]] = sub i64 [[C2_INT]], [[C1_INT]]
+; CHECK-NEXT:    ret i64 [[DIFF]]
+;
   %c1 = getelementptr inbounds [8 x <vscale x 4 x i32>], ptr %ptr, i64 4
   %c2 = getelementptr inbounds [8 x <vscale x 4 x i32>], ptr %ptr, i64 6
   %c1.int = ptrtoint ptr %c1 to i64
diff --git a/llvm/test/Transforms/InstSimplify/icmp-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-constant.ll
index 4f3a5d3dba3b5c..91d85b6f34575e 100644
--- a/llvm/test/Transforms/InstSimplify/icmp-constant.ll
+++ b/llvm/test/Transforms/InstSimplify/icmp-constant.ll
@@ -1215,7 +1215,7 @@ define <2 x i1> @icmp_eq_constant_range_attr_vec(<2 x i8> range(i8 0, 10) %i) {
 
 define <2 x i1> @neg_icmp_eq_constant_range_attr_vec(<2 x i8> range(i8 0, 11) %i) {
 ; CHECK-LABEL: @neg_icmp_eq_constant_range_attr_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[I:%.*]], <i8 10, i8 10>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[I:%.*]], splat (i8 10)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %cmp = icmp eq <2 x i8> %i, <i8 10, i8 10>
@@ -1238,7 +1238,7 @@ define <2 x i1> @icmp_eq_constant_range_return_vec() {
 define <2 x i1> @neg_icmp_eq_constant_range_return_vec() {
 ; CHECK-LABEL: @neg_icmp_eq_constant_range_return_vec(
 ; CHECK-NEXT:    [[I:%.*]] = call <2 x i8> @returns_contain_ten_range_helper_vec()
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[I]], <i8 10, i8 10>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[I]], splat (i8 10)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %i = call <2 x i8> @returns_contain_ten_range_helper_vec()
@@ -1261,7 +1261,7 @@ define <2 x i1> @icmp_eq_constant_range_call_vec() {
 define <2 x i1> @neg_icmp_eq_constant_range_call_vec() {
 ; CHECK-LABEL: @neg_icmp_eq_constant_range_call_vec(
 ; CHECK-NEXT:    [[I:%.*]] = call range(i8 0, 11) <2 x i8> @returns_i8_helper_vec()
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[I]], <i8 10, i8 10>
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i8> [[I]], splat (i8 10)
 ; CHECK-NEXT:    ret <2 x i1> [[CMP]]
 ;
   %i = call range(i8 0, 11) <2 x i8> @returns_i8_helper_vec()
diff --git a/llvm/test/Transforms/InstSimplify/insertelement.ll b/llvm/test/Transforms/InstSimplify/insertelement.ll
index 3fe8b8331a40c0..40c075bf1ea667 100644
--- a/llvm/test/Transforms/InstSimplify/insertelement.ll
+++ b/llvm/test/Transforms/InstSimplify/insertelement.ll
@@ -122,7 +122,7 @@ unreachable_infloop:
 
 define <4 x i32> @insert_into_splat(i32 %index) {
 ; CHECK-LABEL: @insert_into_splat(
-; CHECK-NEXT:    ret <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    ret <4 x i32> splat (i32 3)
 ;
   %I = insertelement <4 x i32> <i32 3, i32 3, i32 3, i32 3>, i32 3, i32 %index
   ret <4 x i32> %I
diff --git a/llvm/test/Transforms/InstSimplify/known-never-infinity.ll b/llvm/test/Transforms/InstSimplify/known-never-infinity.ll
index 4d662c08b1a7a1..af83f00368597f 100644
--- a/llvm/test/Transforms/InstSimplify/known-never-infinity.ll
+++ b/llvm/test/Transforms/InstSimplify/known-never-infinity.ll
@@ -1123,7 +1123,7 @@ define i1 @isKnownNeverInfinity_vector_reduce_maximum(<4 x double> %x) {
 define i1 @isKnownNeverInfinity_vector_reduce_maximum_fail(<4 x double> %x) {
 ; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_maximum_fail
 ; CHECK-SAME: (<4 x double> [[X:%.*]]) {
-; CHECK-NEXT:    [[NINF_X:%.*]] = fadd <4 x double> [[X]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[NINF_X:%.*]] = fadd <4 x double> [[X]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[OP:%.*]] = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> [[NINF_X]])
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000
 ; CHECK-NEXT:    ret i1 [[CMP]]
@@ -1148,7 +1148,7 @@ define i1 @isKnownNeverInfinity_vector_reduce_minimum(<4 x double> %x) {
 define i1 @isKnownNeverInfinity_vector_reduce_minimum_fail(<4 x double> %x) {
 ; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_minimum_fail
 ; CHECK-SAME: (<4 x double> [[X:%.*]]) {
-; CHECK-NEXT:    [[NINF_X:%.*]] = fadd <4 x double> [[X]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[NINF_X:%.*]] = fadd <4 x double> [[X]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[OP:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[NINF_X]])
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000
 ; CHECK-NEXT:    ret i1 [[CMP]]
@@ -1173,7 +1173,7 @@ define i1 @isKnownNeverInfinity_vector_reduce_fmax(<4 x double> %x) {
 define i1 @isKnownNeverInfinity_vector_reduce_fmax_fail(<4 x double> %x) {
 ; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmax_fail
 ; CHECK-SAME: (<4 x double> [[X:%.*]]) {
-; CHECK-NEXT:    [[NINF_X:%.*]] = fadd <4 x double> [[X]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[NINF_X:%.*]] = fadd <4 x double> [[X]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[OP:%.*]] = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[NINF_X]])
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000
 ; CHECK-NEXT:    ret i1 [[CMP]]
@@ -1198,7 +1198,7 @@ define i1 @isKnownNeverInfinity_vector_reduce_fmin(<4 x double> %x) {
 define i1 @isKnownNeverInfinity_vector_reduce_fmin_fail(<4 x double> %x) {
 ; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmin_fail
 ; CHECK-SAME: (<4 x double> [[X:%.*]]) {
-; CHECK-NEXT:    [[NINF_X:%.*]] = fadd <4 x double> [[X]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[NINF_X:%.*]] = fadd <4 x double> [[X]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[OP:%.*]] = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[NINF_X]])
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000
 ; CHECK-NEXT:    ret i1 [[CMP]]
diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll
index 965c333d306d14..1445402d1feb20 100644
--- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll
+++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll
@@ -192,7 +192,7 @@ define <4 x i1> @shuf_nonzero_both(<4 x i8> %xx, <4 x i8> %yy) {
 
 define <4 x i1> @shuf_nonzero_both_fail(<4 x i8> %xx, <4 x i8> %yy) {
 ; CHECK-LABEL: @shuf_nonzero_both_fail(
-; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[Y:%.*]] = add nuw <4 x i8> [[YY:%.*]], <i8 1, i8 1, i8 1, i8 0>
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <4 x i32> <i32 0, i32 4, i32 7, i32 2>
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer
@@ -208,8 +208,8 @@ define <4 x i1> @shuf_nonzero_both_fail(<4 x i8> %xx, <4 x i8> %yy) {
 
 define <4 x i1> @shuf_nonzero_both_fail2(<4 x i8> %xx, <4 x i8> %yy) {
 ; CHECK-LABEL: @shuf_nonzero_both_fail2(
-; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[Y:%.*]] = add <4 x i8> [[YY:%.*]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[X:%.*]] = add nuw <4 x i8> [[XX:%.*]], splat (i8 1)
+; CHECK-NEXT:    [[Y:%.*]] = add <4 x i8> [[YY:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i8> [[X]], <4 x i8> [[Y]], <4 x i32> <i32 0, i32 4, i32 7, i32 2>
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq <4 x i8> [[SHUF]], zeroinitializer
 ; CHECK-NEXT:    ret <4 x i1> [[R]]
@@ -390,7 +390,7 @@ define i1 @nonzero_reduce_or(<2 x i8> %xx) {
 
 define i1 @nonzero_reduce_or_fail(<2 x i8> %xx) {
 ; CHECK-LABEL: @nonzero_reduce_or_fail(
-; CHECK-NEXT:    [[X:%.*]] = add nsw <2 x i8> [[XX:%.*]], <i8 1, i8 1>
+; CHECK-NEXT:    [[X:%.*]] = add nsw <2 x i8> [[XX:%.*]], splat (i8 1)
 ; CHECK-NEXT:    [[V:%.*]] = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> [[X]])
 ; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[V]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
diff --git a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll
index 5c659a1e7086a6..ddd4140b3aeddf 100644
--- a/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll
+++ b/llvm/test/Transforms/InstSimplify/maxmin_intrinsics.ll
@@ -99,7 +99,7 @@ define i3 @smin_poison(i3 %x) {
 
 define <2 x i8> @umax_undef(<2 x i8> %x) {
 ; CHECK-LABEL: @umax_undef(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> %x)
   ret <2 x i8> %r
@@ -107,7 +107,7 @@ define <2 x i8> @umax_undef(<2 x i8> %x) {
 
 define <2 x i8> @umax_poison(<2 x i8> %x) {
 ; CHECK-LABEL: @umax_poison(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> poison, <2 x i8> %x)
   ret <2 x i8> %r
@@ -139,7 +139,7 @@ define i8 @smax_maxval(i8 %x) {
 
 define <2 x i8> @smax_maxval_commute(<2 x i8> %x) {
 ; CHECK-LABEL: @smax_maxval_commute(
-; CHECK-NEXT:    ret <2 x i8> <i8 127, i8 127>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 127)
 ;
   %r = call <2 x i8> @llvm.smax.v2i8(<2 x i8> <i8 127, i8 127>, <2 x i8> %x)
   ret <2 x i8> %r
@@ -155,7 +155,7 @@ define i8 @smin_minval(i8 %x) {
 
 define <2 x i8> @smin_minval_commute(<2 x i8> %x) {
 ; CHECK-LABEL: @smin_minval_commute(
-; CHECK-NEXT:    ret <2 x i8> <i8 -128, i8 -128>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -128)
 ;
   %r = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> <i8 -128, i8 -128>)
   ret <2 x i8> %r
@@ -171,7 +171,7 @@ define i8 @umax_maxval(i8 %x) {
 
 define <2 x i8> @umax_maxval_commute(<2 x i8> %x) {
 ; CHECK-LABEL: @umax_maxval_commute(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> <i8 255, i8 255>, <2 x i8> %x)
   ret <2 x i8> %r
@@ -259,7 +259,7 @@ define <2 x i8> @umin_maxval_commute(<2 x i8> %x) {
 
 define <2 x i8> @smax_maxval_partial_poison(<2 x i8> %x) {
 ; CHECK-LABEL: @smax_maxval_partial_poison(
-; CHECK-NEXT:    ret <2 x i8> <i8 127, i8 127>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 127)
 ;
   %r = call <2 x i8> @llvm.smax.v2i8(<2 x i8> <i8 poison, i8 127>, <2 x i8> %x)
   ret <2 x i8> %r
@@ -267,7 +267,7 @@ define <2 x i8> @smax_maxval_partial_poison(<2 x i8> %x) {
 
 define <2 x i8> @smin_minval_partial_poison(<2 x i8> %x) {
 ; CHECK-LABEL: @smin_minval_partial_poison(
-; CHECK-NEXT:    ret <2 x i8> <i8 -128, i8 -128>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -128)
 ;
   %r = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> <i8 -128, i8 poison>)
   ret <2 x i8> %r
@@ -275,7 +275,7 @@ define <2 x i8> @smin_minval_partial_poison(<2 x i8> %x) {
 
 define <2 x i8> @umax_maxval_partial_poison(<2 x i8> %x) {
 ; CHECK-LABEL: @umax_maxval_partial_poison(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %r = call <2 x i8> @llvm.umax.v2i8(<2 x i8> <i8 255, i8 poison>, <2 x i8> %x)
   ret <2 x i8> %r
@@ -709,7 +709,7 @@ define i8 @umax_umax_constants_commute2(i8 %x) {
 
 define <2 x i8> @umax_umax_constants_commute3(<2 x i8> %x) {
 ; CHECK-LABEL: @umax_umax_constants_commute3(
-; CHECK-NEXT:    [[M:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> <i8 -2, i8 -2>, <2 x i8> [[X:%.*]])
+; CHECK-NEXT:    [[M:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> splat (i8 -2), <2 x i8> [[X:%.*]])
 ; CHECK-NEXT:    ret <2 x i8> [[M]]
 ;
   %m = call <2 x i8> @llvm.umax.v2i8(<2 x i8> <i8 254, i8 254>, <2 x i8> %x)
@@ -739,7 +739,7 @@ define i8 @umin_umin_constants_commute1(i8 %x) {
 
 define <2 x i8> @umin_umin_constants_commute2(<2 x i8> %x) {
 ; CHECK-LABEL: @umin_umin_constants_commute2(
-; CHECK-NEXT:    [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 127, i8 127>)
+; CHECK-NEXT:    [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 127))
 ; CHECK-NEXT:    ret <2 x i8> [[M]]
 ;
   %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> <i8 127, i8 127>)
@@ -769,7 +769,7 @@ define i8 @smax_smax_constants(i8 %x) {
 
 define <2 x i8> @smax_smax_constants_commute1(<2 x i8> %x) {
 ; CHECK-LABEL: @smax_smax_constants_commute1(
-; CHECK-NEXT:    [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> <i8 7, i8 7>, <2 x i8> [[X:%.*]])
+; CHECK-NEXT:    [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> splat (i8 7), <2 x i8> [[X:%.*]])
 ; CHECK-NEXT:    ret <2 x i8> [[M]]
 ;
   %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> <i8 7, i8 7>, <2 x i8> %x)
@@ -799,7 +799,7 @@ define i8 @smax_smax_constants_commute3(i8 %x) {
 
 define <2 x i8> @smin_smin_constants(<2 x i8> %x) {
 ; CHECK-LABEL: @smin_smin_constants(
-; CHECK-NEXT:    [[M:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 7, i8 7>)
+; CHECK-NEXT:    [[M:%.*]] = call <2 x i8> @llvm.smin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> splat (i8 7))
 ; CHECK-NEXT:    ret <2 x i8> [[M]]
 ;
   %m = call <2 x i8> @llvm.smin.v2i8(<2 x i8> %x, <2 x i8> <i8 7, i8 7>)
@@ -842,7 +842,7 @@ define i8 @smin_smin_constants_commute3(i8 %x) {
 define <2 x i8> @umin_umin_constants_partial_undef(<2 x i8> %x) {
 ; CHECK-LABEL: @umin_umin_constants_partial_undef(
 ; CHECK-NEXT:    [[M:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 7, i8 undef>)
-; CHECK-NEXT:    [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> <i8 9, i8 9>, <2 x i8> [[M]])
+; CHECK-NEXT:    [[M2:%.*]] = call <2 x i8> @llvm.umin.v2i8(<2 x i8> splat (i8 9), <2 x i8> [[M]])
 ; CHECK-NEXT:    ret <2 x i8> [[M2]]
 ;
   %m = call <2 x i8> @llvm.umin.v2i8(<2 x i8> %x, <2 x i8> <i8 7, i8 undef> )
@@ -855,7 +855,7 @@ define <2 x i8> @umin_umin_constants_partial_undef(<2 x i8> %x) {
 define <2 x i8> @smax_smax_constants_partial_undef(<2 x i8> %x) {
 ; CHECK-LABEL: @smax_smax_constants_partial_undef(
 ; CHECK-NEXT:    [[M:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 undef, i8 10>)
-; CHECK-NEXT:    [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> <i8 9, i8 9>, <2 x i8> [[M]])
+; CHECK-NEXT:    [[M2:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> splat (i8 9), <2 x i8> [[M]])
 ; CHECK-NEXT:    ret <2 x i8> [[M2]]
 ;
   %m = call <2 x i8> @llvm.smax.v2i8(<2 x i8> %x, <2 x i8> <i8 undef, i8 10> )
diff --git a/llvm/test/Transforms/InstSimplify/negate.ll b/llvm/test/Transforms/InstSimplify/negate.ll
index d07029becd1fe9..95d8f46ac5d249 100644
--- a/llvm/test/Transforms/InstSimplify/negate.ll
+++ b/llvm/test/Transforms/InstSimplify/negate.ll
@@ -64,7 +64,7 @@ define i8 @negate_zero_or_minsigned(i8 %x) {
 
 define <2 x i8> @negate_zero_or_minsigned_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @negate_zero_or_minsigned_vec(
-; CHECK-NEXT:    [[SIGNBIT:%.*]] = and <2 x i8> [[X:%.*]], <i8 -128, i8 -128>
+; CHECK-NEXT:    [[SIGNBIT:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 -128)
 ; CHECK-NEXT:    ret <2 x i8> [[SIGNBIT]]
 ;
   %signbit = and <2 x i8> %x, <i8 128, i8 128>
diff --git a/llvm/test/Transforms/InstSimplify/or.ll b/llvm/test/Transforms/InstSimplify/or.ll
index f241c6987b9e70..387c2b8f92074f 100644
--- a/llvm/test/Transforms/InstSimplify/or.ll
+++ b/llvm/test/Transforms/InstSimplify/or.ll
@@ -19,7 +19,7 @@ define i32 @all_ones(i32 %A) {
 
 define <3 x i8> @all_ones_vec_with_poison_elt(<3 x i8> %A) {
 ; CHECK-LABEL: @all_ones_vec_with_poison_elt(
-; CHECK-NEXT:    ret <3 x i8> <i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    ret <3 x i8> splat (i8 -1)
 ;
   %B = or <3 x i8> %A, <i8 -1, i8 poison, i8 -1>
   ret <3 x i8> %B
@@ -130,7 +130,7 @@ define i8 @test11(i8 %A) {
 define i8 @test11v(<2 x i8> %A) {
 ; CHECK-LABEL: @test11v(
 ; CHECK-NEXT:    [[B:%.*]] = or <2 x i8> [[A:%.*]], <i8 -2, i8 0>
-; CHECK-NEXT:    [[CV:%.*]] = xor <2 x i8> [[B]], <i8 13, i8 13>
+; CHECK-NEXT:    [[CV:%.*]] = xor <2 x i8> [[B]], splat (i8 13)
 ; CHECK-NEXT:    [[C:%.*]] = extractelement <2 x i8> [[CV]], i32 0
 ; CHECK-NEXT:    [[D:%.*]] = or i8 [[C]], 1
 ; CHECK-NEXT:    [[E:%.*]] = xor i8 [[D]], 12
diff --git a/llvm/test/Transforms/InstSimplify/pr28725.ll b/llvm/test/Transforms/InstSimplify/pr28725.ll
index 81884c9270ac2a..9de5bed6aa8cee 100644
--- a/llvm/test/Transforms/InstSimplify/pr28725.ll
+++ b/llvm/test/Transforms/InstSimplify/pr28725.ll
@@ -6,7 +6,7 @@
 define <2 x i16> @test1() {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 extractelement (<2 x i16> bitcast (<1 x i32> <i32 1> to <2 x i16>), i32 0), 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 extractelement (<2 x i16> bitcast (<1 x i32> splat (i32 1) to <2 x i16>), i32 0), 0
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], [[S:%.*]] zeroinitializer, [[S]] { i16 0, i32 1 }
 ; CHECK-NEXT:    [[E:%.*]] = extractvalue [[S]] [[SEL]], 0
 ; CHECK-NEXT:    [[B:%.*]] = insertelement <2 x i16> <i16 undef, i16 0>, i16 [[E]], i32 0
diff --git a/llvm/test/Transforms/InstSimplify/ptrmask.ll b/llvm/test/Transforms/InstSimplify/ptrmask.ll
index d2c4a5dd7f0353..5e7c636d623180 100644
--- a/llvm/test/Transforms/InstSimplify/ptrmask.ll
+++ b/llvm/test/Transforms/InstSimplify/ptrmask.ll
@@ -170,7 +170,7 @@ define ptr @ptrmask_simplify_aligned_unused(ptr align 64 %p) {
 define <2 x ptr> @ptrmask_simplify_aligned_unused_vec(<2 x ptr> align 128 %p) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_aligned_unused_vec
 ; CHECK-SAME: (<2 x ptr> align 128 [[P:%.*]]) {
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -64>)
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -64))
 ; CHECK-NEXT:    ret <2 x ptr> [[R]]
 ;
   %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -64>)
@@ -212,8 +212,8 @@ define ptr @ptrmask_simplify_known_unused(ptr %p) {
 define <2 x ptr> @ptrmask_simplify_known_unused_vec(<2 x ptr> %p) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
-; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -64>)
-; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
+; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -64))
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32)
 ; CHECK-NEXT:    ret <2 x ptr> [[PGEP]]
 ;
   %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -64>)
@@ -225,8 +225,8 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec(<2 x ptr> %p) {
 define <2 x ptr> @ptrmask_simplify_known_unused_vec2(<2 x ptr> %p) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec2
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
-; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -64>)
-; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
+; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -64))
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32)
 ; CHECK-NEXT:    ret <2 x ptr> [[PGEP]]
 ;
   %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -64>)
@@ -239,7 +239,7 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec3(<2 x ptr> %p) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec3
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
 ; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -128>)
-; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32)
 ; CHECK-NEXT:    ret <2 x ptr> [[PGEP]]
 ;
   %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -128>)
@@ -266,7 +266,7 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail(<2 x ptr> %p) {
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
 ; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -128>)
 ; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 16, i64 64>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -32>)
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> splat (i64 -32))
 ; CHECK-NEXT:    ret <2 x ptr> [[R]]
 ;
   %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -128>)
@@ -278,8 +278,8 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail(<2 x ptr> %p) {
 define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail2(<2 x ptr> %p) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail2
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
-; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -64>)
-; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
+; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> splat (i64 -64))
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32)
 ; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -64>)
 ; CHECK-NEXT:    ret <2 x ptr> [[R]]
 ;
@@ -293,8 +293,8 @@ define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail3(<2 x ptr> %p) {
 ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_known_unused_vec_fail3
 ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) {
 ; CHECK-NEXT:    [[PM0:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 -64, i64 -16>)
-; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> <i64 32, i64 32>
-; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> <i64 -32, i64 -32>)
+; CHECK-NEXT:    [[PGEP:%.*]] = getelementptr i8, <2 x ptr> [[PM0]], <2 x i64> splat (i64 32)
+; CHECK-NEXT:    [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PGEP]], <2 x i64> splat (i64 -32))
 ; CHECK-NEXT:    ret <2 x ptr> [[R]]
 ;
   %pm0 = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 -64, i64 -16>)
diff --git a/llvm/test/Transforms/InstSimplify/rem.ll b/llvm/test/Transforms/InstSimplify/rem.ll
index aceb7cb12185d6..1abf0202f0b3e1 100644
--- a/llvm/test/Transforms/InstSimplify/rem.ll
+++ b/llvm/test/Transforms/InstSimplify/rem.ll
@@ -501,7 +501,7 @@ define <2 x i8> @simplfy_srem_of_mul(<2 x i8> %x) {
 define <2 x i8> @simplfy_srem_of_mul_fail_bad_mod(<2 x i8> %x) {
 ; CHECK-LABEL: @simplfy_srem_of_mul_fail_bad_mod(
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw <2 x i8> [[X:%.*]], <i8 20, i8 11>
-; CHECK-NEXT:    [[R:%.*]] = srem <2 x i8> [[MUL]], <i8 5, i8 5>
+; CHECK-NEXT:    [[R:%.*]] = srem <2 x i8> [[MUL]], splat (i8 5)
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %mul = mul nsw <2 x i8> %x, <i8 20, i8 11>
diff --git a/llvm/test/Transforms/InstSimplify/returned.ll b/llvm/test/Transforms/InstSimplify/returned.ll
index 2da1052e7d4d89..cff12c5ea406c9 100644
--- a/llvm/test/Transforms/InstSimplify/returned.ll
+++ b/llvm/test/Transforms/InstSimplify/returned.ll
@@ -38,7 +38,7 @@ define <8 x i1> @returned_vec_arg_casted(<2 x i32> %a) {
 
 define <8 x i1> @returned_vec_arg_casted2(<2 x i32> %a) {
 ; CHECK-LABEL: @returned_vec_arg_casted2(
-; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[A:%.*]], <i32 1, i32 1>
+; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 1)
 ; CHECK-NEXT:    [[X:%.*]] = call <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> [[OR]])
 ; CHECK-NEXT:    [[C:%.*]] = icmp ne <8 x i8> [[X]], zeroinitializer
 ; CHECK-NEXT:    ret <8 x i1> [[C]]
diff --git a/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll b/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll
index ea139f2411cc3d..c3ca2c92da03f7 100644
--- a/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll
+++ b/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll
@@ -74,7 +74,7 @@ define i3 @uadd_scalar_maxval_commute(i3 %a) {
 
 define <2 x i8> @uadd_vector_maxval_commute(<2 x i8> %a) {
 ; CHECK-LABEL: @uadd_vector_maxval_commute(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %x4v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> <i8 255, i8 255>, <2 x i8> %a)
   ret <2 x i8> %x4v
@@ -98,7 +98,7 @@ define i8 @uadd_scalar_poison(i8 %a) {
 
 define <2 x i8> @uadd_vector_undef(<2 x i8> %a) {
 ; CHECK-LABEL: @uadd_vector_undef(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %x5v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 undef, i8 undef>)
   ret <2 x i8> %x5v
@@ -106,7 +106,7 @@ define <2 x i8> @uadd_vector_undef(<2 x i8> %a) {
 
 define <2 x i8> @uadd_vector_poison(<2 x i8> %a) {
 ; CHECK-LABEL: @uadd_vector_poison(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %x5v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 poison, i8 poison>)
   ret <2 x i8> %x5v
@@ -130,7 +130,7 @@ define i8 @uadd_scalar_poison_commute(i8 %a) {
 
 define <2 x i8> @uadd_vector_undef_commute(<2 x i8> %a) {
 ; CHECK-LABEL: @uadd_vector_undef_commute(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %x5v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> %a)
   ret <2 x i8> %x5v
@@ -138,7 +138,7 @@ define <2 x i8> @uadd_vector_undef_commute(<2 x i8> %a) {
 
 define <2 x i8> @uadd_vector_poison_commute(<2 x i8> %a) {
 ; CHECK-LABEL: @uadd_vector_poison_commute(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %x5v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> poison, <2 x i8> %a)
   ret <2 x i8> %x5v
@@ -187,7 +187,7 @@ define i8 @sadd_scalar_maxval(i8 %a) {
 
 define <2 x i8> @sadd_vector_maxval(<2 x i8> %a) {
 ; CHECK-LABEL: @sadd_vector_maxval(
-; CHECK-NEXT:    [[Y3V:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 127, i8 127>)
+; CHECK-NEXT:    [[Y3V:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 127))
 ; CHECK-NEXT:    ret <2 x i8> [[Y3V]]
 ;
   %y3v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 127, i8 127>)
@@ -230,7 +230,7 @@ define i8 @sadd_scalar_poison(i8 %a) {
 
 define <2 x i8> @sadd_vector_undef(<2 x i8> %a) {
 ; CHECK-LABEL: @sadd_vector_undef(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %y5v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> undef)
   ret <2 x i8> %y5v
@@ -238,7 +238,7 @@ define <2 x i8> @sadd_vector_undef(<2 x i8> %a) {
 
 define <2 x i8> @sadd_vector_poison(<2 x i8> %a) {
 ; CHECK-LABEL: @sadd_vector_poison(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %y5v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> poison)
   ret <2 x i8> %y5v
@@ -262,7 +262,7 @@ define i8 @sadd_scalar_poison_commute(i8 %a) {
 
 define <2 x i8> @sadd_vector_undef_commute(<2 x i8> %a) {
 ; CHECK-LABEL: @sadd_vector_undef_commute(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %y6v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> %a)
   ret <2 x i8> %y6v
@@ -270,7 +270,7 @@ define <2 x i8> @sadd_vector_undef_commute(<2 x i8> %a) {
 
 define <2 x i8> @sadd_vector_poison_commute(<2 x i8> %a) {
 ; CHECK-LABEL: @sadd_vector_poison_commute(
-; CHECK-NEXT:    ret <2 x i8> <i8 -1, i8 -1>
+; CHECK-NEXT:    ret <2 x i8> splat (i8 -1)
 ;
   %y6v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> poison, <2 x i8> %a)
   ret <2 x i8> %y6v
@@ -449,7 +449,7 @@ define i8 @ssub_scalar_maxval(i8 %a) {
 
 define <2 x i8> @ssub_vector_maxval(<2 x i8> %a) {
 ; CHECK-LABEL: @ssub_vector_maxval(
-; CHECK-NEXT:    [[Y3V:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 127, i8 127>)
+; CHECK-NEXT:    [[Y3V:%.*]] = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> splat (i8 127))
 ; CHECK-NEXT:    ret <2 x i8> [[Y3V]]
 ;
   %y3v = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 127, i8 127>)
diff --git a/llvm/test/Transforms/InstSimplify/sdiv.ll b/llvm/test/Transforms/InstSimplify/sdiv.ll
index 99092802cab025..ec93e4d2fb6cbb 100644
--- a/llvm/test/Transforms/InstSimplify/sdiv.ll
+++ b/llvm/test/Transforms/InstSimplify/sdiv.ll
@@ -12,7 +12,7 @@ define i32 @negated_operand(i32 %x) {
 
 define <2 x i32> @negated_operand_commute_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @negated_operand_commute_vec(
-; CHECK-NEXT:    ret <2 x i32> <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 -1)
 ;
   %negx = sub nsw <2 x i32> zeroinitializer, %x
   %div = sdiv <2 x i32> %negx, %x
@@ -31,7 +31,7 @@ define i32 @knownnegation(i32 %x, i32 %y) {
 
 define <2 x i32> @knownnegation_commute_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @knownnegation_commute_vec(
-; CHECK-NEXT:    ret <2 x i32> <i32 -1, i32 -1>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 -1)
 ;
   %xy = sub nsw <2 x i32> %x, %y
   %yx = sub nsw <2 x i32> %y, %x
@@ -160,7 +160,7 @@ define <2 x i32> @knownnegation_commute_vec_bad3(<2 x i32> %x, <2 x i32> %y) {
 
 define <3 x i32> @negated_operand_vec_poison(<3 x i32> %x) {
 ; CHECK-LABEL: @negated_operand_vec_poison(
-; CHECK-NEXT:    ret <3 x i32> <i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    ret <3 x i32> splat (i32 -1)
 ;
   %negx = sub nsw <3 x i32> <i32 0, i32 poison, i32 0>, %x
   %div = sdiv <3 x i32> %negx, %x
diff --git a/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll
index 33670128af439c..6d48afc4013515 100644
--- a/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll
+++ b/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll
@@ -59,7 +59,7 @@ define <2 x i32> @equal_arms_vec(<2 x i1> %cond, <2 x i32> %x) {
 
 define <2 x i32> @equal_arms_vec_poison(<2 x i1> %cond) {
 ; CHECK-LABEL: @equal_arms_vec_poison(
-; CHECK-NEXT:    ret <2 x i32> <i32 42, i32 42>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 42)
 ;
   %V = select <2 x i1> %cond, <2 x i32> <i32 42, i32 poison>, <2 x i32> <i32 poison, i32 42>
   ret <2 x i32> %V
@@ -265,7 +265,7 @@ define i32 @test11(i32 %X) {
 
 define <2 x i8> @test11vec(<2 x i8> %X) {
 ; CHECK-LABEL: @test11vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 127)
 ; CHECK-NEXT:    ret <2 x i8> [[AND]]
 ;
   %cmp = icmp sgt <2 x i8> %X, <i8 -1, i8 -1>
@@ -658,8 +658,8 @@ define i1 @and_cmps(i32 %x) {
 
 define <2 x i1> @and_cmps_vector(<2 x i32> %x) {
 ; CHECK-LABEL: @and_cmps_vector(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 92, i32 92>
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt <2 x i32> [[X]], <i32 11, i32 11>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 92)
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt <2 x i32> [[X]], splat (i32 11)
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP1]], <2 x i1> [[CMP2]], <2 x i1> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll
index 4618e1143dc8f9..5f160aa1658616 100644
--- a/llvm/test/Transforms/InstSimplify/select.ll
+++ b/llvm/test/Transforms/InstSimplify/select.ll
@@ -67,7 +67,7 @@ define <2 x i32> @equal_arms_vec(<2 x i1> %cond, <2 x i32> %x) {
 
 define <2 x i32> @equal_arms_vec_poison(<2 x i1> %cond) {
 ; CHECK-LABEL: @equal_arms_vec_poison(
-; CHECK-NEXT:    ret <2 x i32> <i32 42, i32 42>
+; CHECK-NEXT:    ret <2 x i32> splat (i32 42)
 ;
   %V = select <2 x i1> %cond, <2 x i32> <i32 42, i32 poison>, <2 x i32> <i32 poison, i32 42>
   ret <2 x i32> %V
@@ -319,7 +319,7 @@ define i32 @test11(i32 %X) {
 
 define <2 x i8> @test11vec(<2 x i8> %X) {
 ; CHECK-LABEL: @test11vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], <i8 127, i8 127>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i8> [[X:%.*]], splat (i8 127)
 ; CHECK-NEXT:    ret <2 x i8> [[AND]]
 ;
   %cmp = icmp sgt <2 x i8> %X, <i8 -1, i8 -1>
@@ -712,8 +712,8 @@ define i1 @and_cmps(i32 %x) {
 
 define <2 x i1> @and_cmps_vector(<2 x i32> %x) {
 ; CHECK-LABEL: @and_cmps_vector(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], <i32 92, i32 92>
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt <2 x i32> [[X]], <i32 11, i32 11>
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt <2 x i32> [[X:%.*]], splat (i32 92)
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt <2 x i32> [[X]], splat (i32 11)
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP1]], <2 x i1> [[CMP2]], <2 x i1> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
diff --git a/llvm/test/Transforms/InstSimplify/select_or_and.ll b/llvm/test/Transforms/InstSimplify/select_or_and.ll
index 6bb7fbf5dc41d4..41b3353df57268 100644
--- a/llvm/test/Transforms/InstSimplify/select_or_and.ll
+++ b/llvm/test/Transforms/InstSimplify/select_or_and.ll
@@ -204,7 +204,7 @@ define i32 @select_icmp_and_eq_commuted(i32 %a, i32 %b) {
 ; https://alive2.llvm.org/ce/z/HfYXvx
 define <2 x i16> @select_icmp_and_eq_vec(<2 x i16> %a, <2 x i16> %b) {
 ; CHECK-LABEL: @select_icmp_and_eq_vec(
-; CHECK-NEXT:    ret <2 x i16> <i16 -1, i16 -1>
+; CHECK-NEXT:    ret <2 x i16> splat (i16 -1)
 ;
   %and = and <2 x i16> %a, %b
   %tobool = icmp eq <2 x i16> %and, <i16 -1, i16 -1>
diff --git a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll
index 6bf03779379ec7..3917172e3b752b 100644
--- a/llvm/test/Transforms/InstSimplify/shift-knownbits.ll
+++ b/llvm/test/Transforms/InstSimplify/shift-knownbits.ll
@@ -264,7 +264,7 @@ define <2 x i15> @shl_vector_zero(<2 x i15> %a, <2 x i15> %b) {
 
 define <2 x i32> @shl_vector_for_real(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @shl_vector_for_real(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[B:%.*]], <i32 3, i32 3>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[B:%.*]], splat (i32 3)
 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[AND]]
 ; CHECK-NEXT:    ret <2 x i32> [[SHL]]
 ;
@@ -318,7 +318,7 @@ define i32 @lshr_cttz_zero_is_undef(i32 %x) {
 define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec(
 ; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true)
-; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3>
+; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
 ;
   %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true)
@@ -342,7 +342,7 @@ define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) {
 define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec(
 ; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true)
-; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3>
+; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], splat (i8 3)
 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
 ;
   %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true)
diff --git a/llvm/test/Transforms/InstSimplify/shift.ll b/llvm/test/Transforms/InstSimplify/shift.ll
index a816fcbdeeee00..fc7f6653031a68 100644
--- a/llvm/test/Transforms/InstSimplify/shift.ll
+++ b/llvm/test/Transforms/InstSimplify/shift.ll
@@ -115,7 +115,7 @@ define i32 @ashr_all_ones(i32 %A) {
 
 define <3 x i8> @ashr_all_ones_vec_with_poison_elts(<3 x i8> %x, <3 x i8> %y) {
 ; CHECK-LABEL: @ashr_all_ones_vec_with_poison_elts(
-; CHECK-NEXT:    ret <3 x i8> <i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    ret <3 x i8> splat (i8 -1)
 ;
   %sh = ashr <3 x i8> <i8 poison, i8 -1, i8 poison>, %y
   ret <3 x i8> %sh
@@ -225,9 +225,9 @@ define <2 x i64> @shl_or_shr2v(<2 x i32> %a, <2 x i32> %b) {
 ; CHECK-LABEL: @shl_or_shr2v(
 ; CHECK-NEXT:    [[T1:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64>
 ; CHECK-NEXT:    [[T2:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64>
-; CHECK-NEXT:    [[T3:%.*]] = shl nuw <2 x i64> [[T1]], <i64 31, i64 31>
+; CHECK-NEXT:    [[T3:%.*]] = shl nuw <2 x i64> [[T1]], splat (i64 31)
 ; CHECK-NEXT:    [[T4:%.*]] = or <2 x i64> [[T2]], [[T3]]
-; CHECK-NEXT:    [[T5:%.*]] = lshr <2 x i64> [[T4]], <i64 31, i64 31>
+; CHECK-NEXT:    [[T5:%.*]] = lshr <2 x i64> [[T4]], splat (i64 31)
 ; CHECK-NEXT:    ret <2 x i64> [[T5]]
 ;
   %t1 = zext <2 x i32> %a to <2 x i64>
diff --git a/llvm/test/Transforms/InstSimplify/shr-nop.ll b/llvm/test/Transforms/InstSimplify/shr-nop.ll
index 90d49ccb312bcb..29fe222faaae3e 100644
--- a/llvm/test/Transforms/InstSimplify/shr-nop.ll
+++ b/llvm/test/Transforms/InstSimplify/shr-nop.ll
@@ -415,7 +415,7 @@ define <2 x i4097> @ashr_zero_vec(<2 x i4097> %shiftval) {
 
 define <2 x i64> @ashr_minus1_vec(<2 x i64> %shiftval) {
 ; CHECK-LABEL: @ashr_minus1_vec(
-; CHECK-NEXT:    ret <2 x i64> <i64 -1, i64 -1>
+; CHECK-NEXT:    ret <2 x i64> splat (i64 -1)
 ;
   %shr = ashr <2 x i64> <i64 -1, i64 -1>, %shiftval
   ret <2 x i64> %shr
diff --git a/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll
index 49cacdc1e8d0f6..e7b54947587cd0 100644
--- a/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll
+++ b/llvm/test/Transforms/InstSimplify/shufflevector-inseltpoison.ll
@@ -11,7 +11,7 @@ define <4 x i32> @const_folding(<4 x i32> %x) {
 
 define <4 x i32> @const_folding1(<4 x i32> %x) {
 ; CHECK-LABEL: @const_folding1(
-; CHECK-NEXT:    ret <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <4 x i32> splat (i32 5)
 ;
   %shuf = shufflevector <4 x i32> <i32 5, i32 4, i32 5, i32 4>, <4 x i32> %x, <4 x i32> zeroinitializer
   ret <4 x i32> %shuf
@@ -260,7 +260,7 @@ define <2 x float> @PR32872(<2 x float> %x) {
 
 define <5 x i8> @splat_inserted_constant(<4 x i8> %x) {
 ; CHECK-LABEL: @splat_inserted_constant(
-; CHECK-NEXT:    ret <5 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42>
+; CHECK-NEXT:    ret <5 x i8> splat (i8 42)
 ;
   %ins3 = insertelement <4 x i8> %x, i8 42, i64 3
   %splat5 = shufflevector <4 x i8> %ins3, <4 x i8> poison, <5 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3>
diff --git a/llvm/test/Transforms/InstSimplify/shufflevector.ll b/llvm/test/Transforms/InstSimplify/shufflevector.ll
index 20195051616050..0442bd721974b5 100644
--- a/llvm/test/Transforms/InstSimplify/shufflevector.ll
+++ b/llvm/test/Transforms/InstSimplify/shufflevector.ll
@@ -11,7 +11,7 @@ define <4 x i32> @const_folding(<4 x i32> %x) {
 
 define <4 x i32> @const_folding1(<4 x i32> %x) {
 ; CHECK-LABEL: @const_folding1(
-; CHECK-NEXT:    ret <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    ret <4 x i32> splat (i32 5)
 ;
   %shuf = shufflevector <4 x i32> <i32 5, i32 4, i32 5, i32 4>, <4 x i32> %x, <4 x i32> zeroinitializer
   ret <4 x i32> %shuf
@@ -260,7 +260,7 @@ define <2 x float> @PR32872(<2 x float> %x) {
 
 define <5 x i8> @splat_inserted_constant(<4 x i8> %x) {
 ; CHECK-LABEL: @splat_inserted_constant(
-; CHECK-NEXT:    ret <5 x i8> <i8 42, i8 42, i8 42, i8 42, i8 42>
+; CHECK-NEXT:    ret <5 x i8> splat (i8 42)
 ;
   %ins3 = insertelement <4 x i8> %x, i8 42, i64 3
   %splat5 = shufflevector <4 x i8> %ins3, <4 x i8> undef, <5 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -296,7 +296,7 @@ define <4 x i32> @fold_identity(<4 x i32> %x) {
 
 define <4 x i32> @fold_identity2(<4 x i32> %x) {
 ; CHECK-LABEL: @fold_identity2(
-; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], splat (i32 1)
 ; CHECK-NEXT:    ret <4 x i32> [[SHL]]
 ;
   %shl = shl <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll b/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll
index 98422706f653bf..d75c00e04c4ebf 100644
--- a/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll
+++ b/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll
@@ -36,7 +36,7 @@ define float @fadd_x_n0_ebmaytrap(float %a) #0 {
 
 define <2 x float> @fadd_vec_x_n0_ebmaytrap(<2 x float> %a) #0 {
 ; CHECK-LABEL: @fadd_vec_x_n0_ebmaytrap(
-; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]]
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]]
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
   %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float><float -0.0, float -0.0>, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0
@@ -54,7 +54,7 @@ define float @fadd_x_n0_ebstrict(float %a) #0 {
 
 define <2 x float> @fadd_vec_x_n0_ebstrict(<2 x float> %a) #0 {
 ; CHECK-LABEL: @fadd_vec_x_n0_ebstrict(
-; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
   %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float><float -0.0, float -0.0>, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
@@ -72,7 +72,7 @@ define float @fadd_x_n0_neginf(float %a) #0 {
 
 define <2 x float> @fadd_vec_x_n0_neginf(<2 x float> %a) #0 {
 ; CHECK-LABEL: @fadd_vec_x_n0_neginf(
-; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]]
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.downward", metadata !"fpexcept.ignore") #[[ATTR0]]
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
   %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float><float -0.0, float -0.0>, metadata !"round.downward", metadata !"fpexcept.ignore") #0
@@ -90,7 +90,7 @@ define float @fadd_x_n0_dynamic(float %a) #0 {
 
 define <2 x float> @fadd_vec_x_n0_dynamic(<2 x float> %a) #0 {
 ; CHECK-LABEL: @fadd_vec_x_n0_dynamic(
-; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]]
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.dynamic", metadata !"fpexcept.ignore") #[[ATTR0]]
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
   %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float><float -0.0, float -0.0>, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
@@ -144,7 +144,7 @@ define float @fadd_nnan_x_n0_ebstrict(float %a) #0 {
 
 define <2 x float> @fadd_vec_nnan_x_n0_ebstrict(<2 x float> %a) #0 {
 ; CHECK-LABEL: @fadd_vec_nnan_x_n0_ebstrict(
-; CHECK-NEXT:    [[RET:%.*]] = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
+; CHECK-NEXT:    [[RET:%.*]] = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
 ; CHECK-NEXT:    ret <2 x float> [[A]]
 ;
   %ret = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float><float -0.0, float -0.0>, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
@@ -164,7 +164,7 @@ define float @fadd_ninf_x_n0_ebstrict(float %a) #0 {
 ; Test with a fast math flag set but that flag is not "nnan".
 define <2 x float> @fadd_vec_ninf_x_n0_ebstrict(<2 x float> %a) #0 {
 ; CHECK-LABEL: @fadd_vec_ninf_x_n0_ebstrict(
-; CHECK-NEXT:    [[RET:%.*]] = call ninf <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
+; CHECK-NEXT:    [[RET:%.*]] = call ninf <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> splat (float -0.000000e+00), metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]]
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
   %ret = call ninf <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float><float -0.0, float -0.0>, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
@@ -200,7 +200,7 @@ define float @fadd_n0_x_ebmaytrap(float %a) #0 {
 ; TODO: Canonicalize the order of the arguments. Then this will fire.
 define <2 x float> @fadd_vec_n0_x_ebmaytrap(<2 x float> %a) #0 {
 ; CHECK-LABEL: @fadd_vec_n0_x_ebmaytrap(
-; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]]
+; CHECK-NEXT:    [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> splat (float -0.000000e+00), <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.maytrap") #[[ATTR0]]
 ; CHECK-NEXT:    ret <2 x float> [[RET]]
 ;
   %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float><float -0.0, float -0.0>, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.maytrap") #0
diff --git a/llvm/test/Transforms/InstSimplify/uscmp.ll b/llvm/test/Transforms/InstSimplify/uscmp.ll
index 47720060acb528..9b4f8dd7a5a4be 100644
--- a/llvm/test/Transforms/InstSimplify/uscmp.ll
+++ b/llvm/test/Transforms/InstSimplify/uscmp.ll
@@ -83,7 +83,7 @@ define i8 @ucmp_undef() {
 
 define <4 x i8> @ucmp_lt_splat() {
 ; CHECK-LABEL: define <4 x i8> @ucmp_lt_splat() {
-; CHECK-NEXT:    ret <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    ret <4 x i8> splat (i8 -1)
 ;
   %1 = call <4 x i8> @llvm.ucmp(<4 x i32> splat(i32 1), <4 x i32> splat(i32 3))
   ret <4 x i8> %1
@@ -222,7 +222,7 @@ define i8 @ucmp_with_addition2(i32 %x) {
 define <4 x i8> @ucmp_with_addition_vec(<4 x i32> %x) {
 ; CHECK-LABEL: define <4 x i8> @ucmp_with_addition_vec(
 ; CHECK-SAME: <4 x i32> [[X:%.*]]) {
-; CHECK-NEXT:    ret <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    ret <4 x i8> splat (i8 -1)
 ;
   %1 = add nuw <4 x i32> %x, splat(i32 1)
   %2 = call <4 x i8> @llvm.ucmp(<4 x i32> %x, <4 x i32> %1)
diff --git a/llvm/test/Transforms/InstSimplify/vector_gep.ll b/llvm/test/Transforms/InstSimplify/vector_gep.ll
index a1d0bd379aa740..c373e15f19e822 100644
--- a/llvm/test/Transforms/InstSimplify/vector_gep.ll
+++ b/llvm/test/Transforms/InstSimplify/vector_gep.ll
@@ -56,7 +56,7 @@ define <4 x ptr> @test4(<4 x ptr> %a) {
 
 define <4 x ptr> @test5() {
 ; CHECK-LABEL: define <4 x ptr> @test5() {
-; CHECK-NEXT:    ret <4 x ptr> getelementptr (i8, <4 x ptr> <ptr inttoptr (i64 1 to ptr), ptr inttoptr (i64 2 to ptr), ptr inttoptr (i64 3 to ptr), ptr inttoptr (i64 4 to ptr)>, <4 x i64> <i64 1, i64 1, i64 1, i64 1>)
+; CHECK-NEXT:    ret <4 x ptr> getelementptr (i8, <4 x ptr> <ptr inttoptr (i64 1 to ptr), ptr inttoptr (i64 2 to ptr), ptr inttoptr (i64 3 to ptr), ptr inttoptr (i64 4 to ptr)>, <4 x i64> splat (i64 1))
 ;
   %c = inttoptr <4 x i64> <i64 1, i64 2, i64 3, i64 4> to <4 x ptr>
   %gep = getelementptr i8, <4 x ptr> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
diff --git a/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll
index 167d282edb3e72..41dcf2a0637230 100644
--- a/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll
+++ b/llvm/test/Transforms/InterleavedAccess/X86/interleave-load-extract-shuffle-changes.ll
@@ -12,7 +12,7 @@ define <2 x double> @shuffle_binop_fol(ptr %ptr) {
 ; CHECK-NEXT:  vector.body.preheader:
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    [[EXTRACTED1:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[EXTRACTED2:%.*]] = shufflevector <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, <4 x double> poison, <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[EXTRACTED2:%.*]] = shufflevector <4 x double> splat (double 1.000000e+00), <4 x double> poison, <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[FADD3:%.*]] = fadd <2 x double> [[EXTRACTED1]], [[EXTRACTED2]]
 ; CHECK-NEXT:    ret <2 x double> [[FADD3]]
 ;
@@ -27,7 +27,7 @@ define <2 x double> @shuffle_binop_fol_oob(ptr %ptr) {
 ; CHECK-LABEL: @shuffle_binop_fol_oob(
 ; CHECK-NEXT:  vector.body.preheader:
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[PTR:%.*]], align 8
-; CHECK-NEXT:    [[FADD:%.*]] = fadd <4 x double> [[WIDE_LOAD]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[FADD:%.*]] = fadd <4 x double> [[WIDE_LOAD]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[EXTRACTED:%.*]] = shufflevector <4 x double> [[FADD]], <4 x double> undef, <2 x i32> <i32 0, i32 4>
 ; CHECK-NEXT:    ret <2 x double> [[EXTRACTED]]
 ;
diff --git a/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll b/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll
index f38f948c2a2e50..e6a8af60f12872 100644
--- a/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll
+++ b/llvm/test/Transforms/LoopLoadElim/type-mismatch-opaque-ptr.ll
@@ -217,7 +217,7 @@ define void @f4(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) {
 ; CHECK-NEXT:    [[STORE_FORWARD_CAST]] = bitcast i32 [[A_P1]] to <2 x half>
 ; CHECK-NEXT:    store i32 [[A_P1]], ptr [[AIDX_NEXT]], align 4
 ; CHECK-NEXT:    [[A:%.*]] = load <2 x half>, ptr [[AIDX]], align 4
-; CHECK-NEXT:    [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], <half 0xH4000, half 0xH4000>
+; CHECK-NEXT:    [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], splat (half 0xH4000)
 ; CHECK-NEXT:    [[C_INT:%.*]] = bitcast <2 x half> [[C]] to i32
 ; CHECK-NEXT:    store i32 [[C_INT]], ptr [[CIDX]], align 4
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
diff --git a/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll b/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll
index b5316f553c21b7..56b910eebea92f 100644
--- a/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll
+++ b/llvm/test/Transforms/LoopLoadElim/type-mismatch.ll
@@ -217,7 +217,7 @@ define void @f4(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) {
 ; CHECK-NEXT:    [[STORE_FORWARD_CAST]] = bitcast i32 [[A_P1]] to <2 x half>
 ; CHECK-NEXT:    store i32 [[A_P1]], ptr [[AIDX_NEXT]], align 4
 ; CHECK-NEXT:    [[A:%.*]] = load <2 x half>, ptr [[AIDX]], align 4
-; CHECK-NEXT:    [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], <half 0xH4000, half 0xH4000>
+; CHECK-NEXT:    [[C:%.*]] = fmul <2 x half> [[STORE_FORWARDED]], splat (half 0xH4000)
 ; CHECK-NEXT:    [[C_INT:%.*]] = bitcast <2 x half> [[C]] to i32
 ; CHECK-NEXT:    store i32 [[C_INT]], ptr [[CIDX]], align 4
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]]
diff --git a/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll b/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll
index 2bb6f05b91b1ab..3673b1b4d2241a 100644
--- a/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll
+++ b/llvm/test/Transforms/LoopUnroll/ARM/mve-upperbound.ll
@@ -28,7 +28,7 @@ define void @unroll_upper(ptr noundef %pSrc, ptr nocapture noundef writeonly %pD
 ; CHECK-NEXT:    [[NEXT_GEP37:%.*]] = getelementptr i8, ptr [[PSRC]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 [[INDEX]], i32 [[AND]])
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr [[NEXT_GEP37]], i32 2, <8 x i1> [[ACTIVE_LANE_MASK]], <8 x i16> poison)
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[WIDE_MASKED_LOAD]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <8 x i16> [[WIDE_MASKED_LOAD]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
 ; CHECK-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP3]], ptr [[NEXT_GEP]], i32 1, <8 x i1> [[ACTIVE_LANE_MASK]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
index d0041668e77bde..456875ecb7d1fc 100644
--- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll
@@ -32,22 +32,22 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND12:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT13_1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12]]
+; CHECK-NEXT:    [[TMP4:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
+; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <16 x i8> [[TMP7]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[TMP9:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND_NEXT13]]
+; CHECK-NEXT:    [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], splat (i32 16)
+; CHECK-NEXT:    [[TMP9:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND_NEXT13]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = and <16 x i32> [[TMP9]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq <16 x i32> [[TMP10]], zeroinitializer
-; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
+; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49)
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_NEXT]]
 ; CHECK-NEXT:    store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT_1]] = add i64 [[INDEX]], 32
-; CHECK-NEXT:    [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
+; CHECK-NEXT:    [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], splat (i32 32)
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]]
@@ -63,10 +63,10 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK:       vector.body.epil.preheader:
 ; CHECK-NEXT:    br label [[VECTOR_BODY_EPIL:%.*]]
 ; CHECK:       vector.body.epil:
-; CHECK-NEXT:    [[TMP14:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12_UNR]]
+; CHECK-NEXT:    [[TMP14:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12_UNR]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = and <16 x i32> [[TMP14]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT:    [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
+; CHECK-NEXT:    [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49)
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_UNR]]
 ; CHECK-NEXT:    store <16 x i8> [[TMP17]], ptr [[TMP18]], align 1
 ; CHECK-NEXT:    br label [[MIDDLE_BLOCK]]
diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
index c2a8675f7ebbab..cd4198f8160f73 100644
--- a/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
+++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll
@@ -32,22 +32,22 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND12:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH_NEW]] ], [ [[VEC_IND_NEXT13_1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[NITER:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12]]
+; CHECK-NEXT:    [[TMP4:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = and <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <16 x i32> [[TMP5]], zeroinitializer
-; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
+; CHECK-NEXT:    [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[S:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <16 x i8> [[TMP7]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT:%.*]] = add nuw nsw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[TMP9:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND_NEXT13]]
+; CHECK-NEXT:    [[VEC_IND_NEXT13:%.*]] = add <16 x i32> [[VEC_IND12]], splat (i32 16)
+; CHECK-NEXT:    [[TMP9:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND_NEXT13]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = and <16 x i32> [[TMP9]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq <16 x i32> [[TMP10]], zeroinitializer
-; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
+; CHECK-NEXT:    [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49)
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_NEXT]]
 ; CHECK-NEXT:    store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT_1]] = add i64 [[INDEX]], 32
-; CHECK-NEXT:    [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
+; CHECK-NEXT:    [[VEC_IND_NEXT13_1]] = add <16 x i32> [[VEC_IND12]], splat (i32 32)
 ; CHECK-NEXT:    [[NITER_NEXT_1]] = add i64 [[NITER]], 2
 ; CHECK-NEXT:    [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
 ; CHECK-NEXT:    br i1 [[NITER_NCMP_1]], label [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT:%.*]], label [[VECTOR_BODY]]
@@ -63,10 +63,10 @@ define ptr @f(ptr returned %s, i32 zeroext %x, i32 signext %k) local_unnamed_add
 ; CHECK:       vector.body.epil.preheader:
 ; CHECK-NEXT:    br label [[VECTOR_BODY_EPIL:%.*]]
 ; CHECK:       vector.body.epil:
-; CHECK-NEXT:    [[TMP14:%.*]] = shl <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VEC_IND12_UNR]]
+; CHECK-NEXT:    [[TMP14:%.*]] = shl <16 x i32> splat (i32 1), [[VEC_IND12_UNR]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = and <16 x i32> [[TMP14]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT:    [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> <i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48, i8 48>, <16 x i8> <i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49, i8 49>
+; CHECK-NEXT:    [[TMP17:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> splat (i8 48), <16 x i8> splat (i8 49)
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[S]], i64 [[INDEX_UNR]]
 ; CHECK-NEXT:    store <16 x i8> [[TMP17]], ptr [[TMP18]], align 1
 ; CHECK-NEXT:    br label [[MIDDLE_BLOCK]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
index dcb8ba73661667..4f050877bd1316 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll
@@ -21,8 +21,8 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) {
 ; CHECK-NEXT:    [[WIDE_LOAD1]] = load <2 x i64>, ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[WIDE_LOAD]], <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i64> [[WIDE_LOAD]], <2 x i64> [[WIDE_LOAD1]], <2 x i32> <i32 1, i32 2>
-; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> <i64 1, i64 1>, <2 x i64> [[TMP6]], <2 x i64> <i64 1, i64 1>)
-; CHECK-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> <i64 1, i64 1>, <2 x i64> [[TMP7]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP6]], <2 x i64> splat (i64 1))
+; CHECK-NEXT:    [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP7]], <2 x i64> splat (i64 1))
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 7f325ce1a1f04b..5f7920859c31e9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -89,8 +89,8 @@ define void @loop_dependent_cond(ptr %src, ptr noalias %dst, i64 %N) {
 ; DEFAULT-NEXT:    [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP6]], align 8
 ; DEFAULT-NEXT:    [[TMP7:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD]])
 ; DEFAULT-NEXT:    [[TMP8:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD1]])
-; DEFAULT-NEXT:    [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP7]], <double 1.000000e+00, double 1.000000e+00>
-; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp ogt <2 x double> [[TMP8]], <double 1.000000e+00, double 1.000000e+00>
+; DEFAULT-NEXT:    [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP7]], splat (double 1.000000e+00)
+; DEFAULT-NEXT:    [[TMP10:%.*]] = fcmp ogt <2 x double> [[TMP8]], splat (double 1.000000e+00)
 ; DEFAULT-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
 ; DEFAULT-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DEFAULT:       pred.store.if:
@@ -390,7 +390,7 @@ define void @latch_branch_cost(ptr %dst) {
 ; PRED:       vector.body:
 ; PRED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
 ; PRED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; PRED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], <i64 99, i64 99, i64 99, i64 99, i64 99, i64 99, i64 99, i64 99>
+; PRED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 99)
 ; PRED-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
 ; PRED-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; PRED:       pred.store.if:
@@ -456,7 +456,7 @@ define void @latch_branch_cost(ptr %dst) {
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; PRED:       pred.store.continue14:
 ; PRED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
-; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
 ; PRED-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104
 ; PRED-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; PRED:       middle.block:
@@ -744,7 +744,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
 ; DEFAULT-NEXT:    [[TMP1:%.*]] = load i16, ptr [[SRC]], align 2
 ; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
 ; DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
-; DEFAULT-NEXT:    [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; DEFAULT-NEXT:    [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
 ; DEFAULT-NEXT:    [[TMP3:%.*]] = uitofp <8 x i16> [[TMP2]] to <8 x double>
 ; DEFAULT-NEXT:    [[TMP4:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i32 0
 ; DEFAULT-NEXT:    store <8 x double> [[TMP3]], ptr [[TMP4]], align 8
@@ -869,7 +869,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
 ; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ]
 ; DEFAULT-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ]
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8
-; DEFAULT-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>
+; DEFAULT-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 6)
 ; DEFAULT-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
 ; DEFAULT-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DEFAULT:       pred.store.if:
@@ -942,7 +942,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
 ; DEFAULT-NEXT:    store i8 [[TMP33]], ptr [[TMP32]], align 1
 ; DEFAULT-NEXT:    br label [[PRED_STORE_CONTINUE14]]
 ; DEFAULT:       pred.store.continue14:
-; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
 ; DEFAULT-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
 ; DEFAULT-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; DEFAULT:       middle.block:
@@ -971,7 +971,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
 ; PRED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ]
 ; PRED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ]
 ; PRED-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8
-; PRED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>
+; PRED-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], splat (i64 6)
 ; PRED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
 ; PRED-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; PRED:       pred.store.if:
@@ -1044,7 +1044,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
 ; PRED-NEXT:    store i8 [[TMP33]], ptr [[TMP32]], align 1
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE14]]
 ; PRED:       pred.store.continue14:
-; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
 ; PRED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
 ; PRED-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
 ; PRED:       middle.block:
@@ -1258,7 +1258,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; DEFAULT-NEXT:    br label [[PRED_STORE_CONTINUE27]]
 ; DEFAULT:       pred.store.continue27:
 ; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
 ; DEFAULT-NEXT:    [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; DEFAULT-NEXT:    br i1 [[TMP80]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; DEFAULT:       middle.block:
@@ -1481,7 +1481,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
 ; PRED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 8
 ; PRED-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 [[INDEX]], i64 [[TMP17]])
 ; PRED-NEXT:    [[TMP84:%.*]] = xor <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
-; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; PRED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
 ; PRED-NEXT:    [[TMP85:%.*]] = extractelement <8 x i1> [[TMP84]], i32 0
 ; PRED-NEXT:    br i1 [[TMP85]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
 ; PRED:       middle.block:
@@ -1562,8 +1562,8 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
 ; DEFAULT:       vector.body:
 ; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
 ; DEFAULT-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; DEFAULT-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], <i64 20, i64 20, i64 20, i64 20>
-; DEFAULT-NEXT:    [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; DEFAULT-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 20)
+; DEFAULT-NEXT:    [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
 ; DEFAULT-NEXT:    [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
 ; DEFAULT-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; DEFAULT-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
@@ -1594,7 +1594,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
 ; DEFAULT-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; DEFAULT:       pred.store.continue6:
 ; DEFAULT-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; DEFAULT-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
 ; DEFAULT-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
 ; DEFAULT:       middle.block:
@@ -1625,8 +1625,8 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
 ; PRED:       vector.body:
 ; PRED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
 ; PRED-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; PRED-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], <i64 20, i64 20, i64 20, i64 20>
-; PRED-NEXT:    [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; PRED-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 20)
+; PRED-NEXT:    [[TMP1:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
 ; PRED-NEXT:    [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
 ; PRED-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; PRED-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
@@ -1657,7 +1657,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) optsize {
 ; PRED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; PRED:       pred.store.continue6:
 ; PRED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; PRED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; PRED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; PRED-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
 ; PRED-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
 ; PRED:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
index 0f33e8fa79ce7b..2ba460255c607d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
@@ -33,14 +33,14 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i16>
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul nuw <16 x i16> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = lshr <16 x i16> [[TMP5]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[TMP6:%.*]] = lshr <16 x i16> [[TMP5]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP7:%.*]] = trunc nuw <16 x i16> [[TMP6]] to <16 x i8>
 ; CHECK-NEXT:    store <16 x i8> [[TMP7]], ptr [[TMP2]], align 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i16>
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul nuw <16 x i16> [[TMP9]], [[TMP4]]
-; CHECK-NEXT:    [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP12:%.*]] = trunc nuw <16 x i16> [[TMP11]] to <16 x i8>
 ; CHECK-NEXT:    store <16 x i8> [[TMP12]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
@@ -66,14 +66,14 @@ define void @test_pr25490(i32 %n, ptr noalias nocapture %a, ptr noalias nocaptur
 ; CHECK-NEXT:    [[TMP16:%.*]] = zext <8 x i8> [[WIDE_LOAD8]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP17:%.*]] = zext <8 x i8> [[WIDE_LOAD7]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP18:%.*]] = mul nuw <8 x i16> [[TMP16]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP20:%.*]] = trunc nuw <8 x i16> [[TMP19]] to <8 x i8>
 ; CHECK-NEXT:    store <8 x i8> [[TMP20]], ptr [[TMP15]], align 1
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX6]]
 ; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1
 ; CHECK-NEXT:    [[TMP22:%.*]] = zext <8 x i8> [[WIDE_LOAD9]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP23:%.*]] = mul nuw <8 x i16> [[TMP22]], [[TMP17]]
-; CHECK-NEXT:    [[TMP24:%.*]] = lshr <8 x i16> [[TMP23]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[TMP24:%.*]] = lshr <8 x i16> [[TMP23]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP25:%.*]] = trunc nuw <8 x i16> [[TMP24]] to <8 x i8>
 ; CHECK-NEXT:    store <8 x i8> [[TMP25]], ptr [[TMP21]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT10]] = add nuw i64 [[INDEX6]], 8
@@ -162,43 +162,43 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[A]] to i16
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i16> poison, i16 [[B]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = mul <16 x i16> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = lshr <16 x i16> [[TMP5]], <i16 8, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
-; CHECK-NEXT:    [[TMP9:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8>
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <16 x i8> [[TMP9]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i16> poison, i16 [[B]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <16 x i16> [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr <16 x i16> [[TMP3]], <i16 8, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+; CHECK-NEXT:    [[TMP5:%.*]] = trunc <16 x i16> [[TMP4]] to <16 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <16 x i8> [[TMP5]], <16 x i8> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16
-; CHECK-NEXT:    store <16 x i8> [[TMP10]], ptr [[TMP12]], align 1
-; CHECK-NEXT:    store <16 x i8> [[TMP10]], ptr [[TMP13]], align 1
+; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[INDEX]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16
+; CHECK-NEXT:    store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1
+; CHECK-NEXT:    store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
-; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; CHECK:       vec.epilog.iter.check:
 ; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
 ; CHECK:       vec.epilog.ph:
-; CHECK-NEXT:    [[TMP15:%.*]] = trunc i32 [[A]] to i16
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
-; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i64 0
-; CHECK-NEXT:    [[TMP18:%.*]] = mul <8 x i16> [[TMP16]], [[TMP17]]
-; CHECK-NEXT:    [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
-; CHECK-NEXT:    [[TMP20:%.*]] = trunc <8 x i16> [[TMP19]] to <8 x i8>
-; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <8 x i8> [[TMP20]], <8 x i8> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = trunc i32 [[A]] to i16
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <8 x i16> poison, i16 [[TMP11]], i64 0
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <8 x i16> poison, i16 [[B]], i64 0
+; CHECK-NEXT:    [[TMP14:%.*]] = mul <8 x i16> [[TMP12]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = lshr <8 x i16> [[TMP14]], <i16 8, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+; CHECK-NEXT:    [[TMP16:%.*]] = trunc <8 x i16> [[TMP15]] to <8 x i8>
+; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
 ; CHECK-NEXT:    [[INDEX7:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP22:%.*]] = sext i32 [[INDEX7]] to i64
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]]
-; CHECK-NEXT:    store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1
+; CHECK-NEXT:    [[TMP18:%.*]] = sext i32 [[INDEX7]] to i64
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP18]]
+; CHECK-NEXT:    store <8 x i8> [[TMP17]], ptr [[TMP19]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT8]] = add nuw i32 [[INDEX7]], 8
-; CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT8]], 1000
-; CHECK-NEXT:    br i1 [[TMP24]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT8]], 1000
+; CHECK-NEXT:    br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
 ; CHECK-NEXT:    br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
@@ -240,43 +240,43 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 %
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[A]] to i16
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = mul <16 x i16> [[TMP1]],  <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[TMP3]], <16 x i16> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr <16 x i16> [[TMP4]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[C]], <16 x i16> [[TMP5]], <16 x i16> [[TMP4]]
-; CHECK-NEXT:    [[TMP9:%.*]] = trunc <16 x i16> [[TMP7]] to <16 x i8>
+; CHECK-NEXT:    [[TMP2:%.*]] = mul <16 x i16> [[TMP1]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr <16 x i16> [[TMP3]], splat (i16 8)
+; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[C]], <16 x i16> [[TMP4]], <16 x i16> [[TMP3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc <16 x i16> [[TMP5]] to <16 x i8>
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[INDEX]] to i64
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 16
-; CHECK-NEXT:    store <16 x i8> [[TMP9]], ptr [[TMP12]], align 1
-; CHECK-NEXT:    store <16 x i8> [[TMP10]], ptr [[TMP13]], align 1
+; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[INDEX]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16
+; CHECK-NEXT:    store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1
+; CHECK-NEXT:    store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
-; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; CHECK:       vec.epilog.iter.check:
 ; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
 ; CHECK:       vec.epilog.ph:
-; CHECK-NEXT:    [[TMP15:%.*]] = trunc i32 [[A]] to i16
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
-; CHECK-NEXT:    [[TMP17:%.*]] = mul <8 x i16> [[TMP16]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
-; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <8 x i16> [[TMP17]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP19:%.*]] = lshr <8 x i16> [[TMP18]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
-; CHECK-NEXT:    [[TMP20:%.*]] = select i1 [[C]], <8 x i16> [[TMP19]], <8 x i16> [[TMP18]]
-; CHECK-NEXT:    [[TMP21:%.*]] = trunc <8 x i16> [[TMP20]] to <8 x i8>
+; CHECK-NEXT:    [[TMP11:%.*]] = trunc i32 [[A]] to i16
+; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <8 x i16> poison, i16 [[TMP11]], i64 0
+; CHECK-NEXT:    [[TMP13:%.*]] = mul <8 x i16> [[TMP12]], <i16 99, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <8 x i16> [[TMP13]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP15:%.*]] = lshr <8 x i16> [[TMP14]], splat (i16 8)
+; CHECK-NEXT:    [[TMP16:%.*]] = select i1 [[C]], <8 x i16> [[TMP15]], <8 x i16> [[TMP14]]
+; CHECK-NEXT:    [[TMP17:%.*]] = trunc <8 x i16> [[TMP16]] to <8 x i8>
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
 ; CHECK-NEXT:    [[INDEX3:%.*]] = phi i32 [ 992, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP22:%.*]] = sext i32 [[INDEX3]] to i64
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP22]]
-; CHECK-NEXT:    store <8 x i8> [[TMP21]], ptr [[TMP23]], align 1
+; CHECK-NEXT:    [[TMP18:%.*]] = sext i32 [[INDEX3]] to i64
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP18]]
+; CHECK-NEXT:    store <8 x i8> [[TMP17]], ptr [[TMP19]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT4]] = add nuw i32 [[INDEX3]], 8
-; CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000
-; CHECK-NEXT:    br i1 [[TMP24]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 1000
+; CHECK-NEXT:    br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
 ; CHECK-NEXT:    br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
@@ -464,8 +464,8 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP1]], align 4
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP1]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
index 7749bb9edb124e..7c6a881ead1087 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll
@@ -124,7 +124,7 @@ define void @test_widen_induction(ptr %A, i64 %N) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
@@ -132,7 +132,7 @@ define void @test_widen_induction(ptr %A, i64 %N) {
 ; CHECK-NEXT:    store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4
 ; CHECK-NEXT:    store <2 x i64> [[STEP_ADD]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
 ; CHECK:       middle.block:
@@ -159,7 +159,7 @@ define void @test_widen_induction(ptr %A, i64 %N) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[VEC_IND6]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT7]] = add <2 x i64> [[VEC_IND6]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT7]] = add <2 x i64> [[VEC_IND6]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
 ; CHECK:       vec.epilog.middle.block:
@@ -213,7 +213,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP1]]
@@ -222,7 +222,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
 ; CHECK-NEXT:    store <2 x i64> [[VEC_IND]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    store <2 x i64> [[STEP_ADD]], ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
 ; CHECK:       middle.block:
@@ -252,7 +252,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[VEC_IND11]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT14]] = add nuw i64 [[INDEX7]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT12]] = add <2 x i64> [[VEC_IND11]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT12]] = add <2 x i64> [[VEC_IND11]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
 ; CHECK:       vec.epilog.middle.block:
@@ -301,17 +301,17 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], <i64 10, i64 10>
-; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], <i64 10, i64 10>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 10)
+; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 10)
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2
 ; CHECK-NEXT:    store <2 x i64> [[TMP2]], ptr [[TMP4]], align 4
 ; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[IND_END4]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
 ; CHECK:       middle.block:
@@ -335,11 +335,11 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) {
 ; CHECK-NEXT:    [[VEC_IND8:%.*]] = phi <2 x i64> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX7]], 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = add <2 x i64> [[VEC_IND8]], <i64 10, i64 10>
+; CHECK-NEXT:    [[TMP9:%.*]] = add <2 x i64> [[VEC_IND8]], splat (i64 10)
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[TMP9]], ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT9]] = add <2 x i64> [[VEC_IND8]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT9]] = add <2 x i64> [[VEC_IND8]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[IND_END]]
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
 ; CHECK:       vec.epilog.middle.block:
@@ -393,7 +393,7 @@ define void @test_widen_extended_induction(ptr %dst) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], <i8 2, i8 2>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i64
@@ -403,7 +403,7 @@ define void @test_widen_extended_induction(ptr %dst) {
 ; CHECK-NEXT:    store <2 x i8> [[VEC_IND]], ptr [[TMP3]], align 1
 ; CHECK-NEXT:    store <2 x i8> [[STEP_ADD]], ptr [[TMP4]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], <i8 2, i8 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], splat (i8 2)
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 10000
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
 ; CHECK:       middle.block:
@@ -427,7 +427,7 @@ define void @test_widen_extended_induction(ptr %dst) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
 ; CHECK-NEXT:    store <2 x i8> [[VEC_IND3]], ptr [[TMP9]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT6]] = add nuw i32 [[INDEX2]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], <i8 2, i8 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], splat (i8 2)
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT6]], 10000
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
 ; CHECK:       vec.epilog.middle.block:
@@ -475,7 +475,7 @@ define void @test_widen_truncated_induction(ptr %A) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i8> [ <i8 0, i8 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], <i8 2, i8 2>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
@@ -483,7 +483,7 @@ define void @test_widen_truncated_induction(ptr %A) {
 ; CHECK-NEXT:    store <2 x i8> [[VEC_IND]], ptr [[TMP2]], align 1
 ; CHECK-NEXT:    store <2 x i8> [[STEP_ADD]], ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], <i8 2, i8 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], splat (i8 2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
 ; CHECK:       middle.block:
@@ -506,7 +506,7 @@ define void @test_widen_truncated_induction(ptr %A) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0
 ; CHECK-NEXT:    store <2 x i8> [[VEC_IND3]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT5]] = add nuw i64 [[INDEX2]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], <i8 2, i8 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT4]] = add <2 x i8> [[VEC_IND3]], splat (i8 2)
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT5]], 10000
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
 ; CHECK:       vec.epilog.middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll
index e9c9288e734394..c37f29637e37d7 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll
@@ -16,7 +16,7 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -66,7 +66,7 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX_NEXT]], i64 1002)
 ; CHECK-NEXT:    [[TMP30:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <4 x i1> [[TMP30]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
index eafe6921b37ca7..c3e8111d33ab6c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll
@@ -14,8 +14,8 @@ define double @test_reduction_costs() {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi double [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> <double 3.000000e+00, double 3.000000e+00>)
-; CHECK-NEXT:    [[TMP1]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI1]], <2 x double> <double 9.000000e+00, double 9.000000e+00>)
+; CHECK-NEXT:    [[TMP0]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI]], <2 x double> splat (double 3.000000e+00))
+; CHECK-NEXT:    [[TMP1]] = call double @llvm.vector.reduce.fadd.v2f64(double [[VEC_PHI1]], <2 x double> splat (double 9.000000e+00))
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -274,8 +274,8 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4
 ; CHECK-NEXT:    [[TMP21:%.*]] = select <2 x i1> [[TMP19]], <2 x i1> [[BROADCAST_SPLAT57]], <2 x i1> zeroinitializer
 ; CHECK-NEXT:    [[TMP22:%.*]] = or <2 x i1> [[TMP47]], [[TMP20]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = or <2 x i1> [[TMP47]], [[TMP21]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP20]], <2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 1>
-; CHECK-NEXT:    [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> <i64 1, i64 1>
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP20]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1)
+; CHECK-NEXT:    [[PREDPHI58:%.*]] = select <2 x i1> [[TMP21]], <2 x i64> zeroinitializer, <2 x i64> splat (i64 1)
 ; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x i1> [[TMP22]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP24]], label %[[PRED_STORE_IF59:.*]], label %[[PRED_STORE_CONTINUE60:.*]]
 ; CHECK:       [[PRED_STORE_IF59]]:
@@ -313,29 +313,29 @@ define void @test_exit_branch_cost(ptr %dst, i64 %x, i32 %y, ptr %dst.1, i1 %c.4
 ; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP38]], label %[[PRED_STORE_IF67:.*]], label %[[PRED_STORE_CONTINUE68:.*]]
 ; CHECK:       [[PRED_STORE_IF67]]:
-; CHECK-NEXT:    [[TMP39:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]]
-; CHECK-NEXT:    store i64 [[TMP39]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]]
+; CHECK-NEXT:    [[TMP45:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]]
+; CHECK-NEXT:    store i64 [[TMP45]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE68]]
 ; CHECK:       [[PRED_STORE_CONTINUE68]]:
 ; CHECK-NEXT:    [[TMP40:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP40]], label %[[PRED_STORE_IF69:.*]], label %[[PRED_STORE_CONTINUE70:.*]]
 ; CHECK:       [[PRED_STORE_IF69]]:
-; CHECK-NEXT:    [[TMP41:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]]
-; CHECK-NEXT:    store i64 [[TMP41]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]]
+; CHECK-NEXT:    [[TMP39:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]]
+; CHECK-NEXT:    store i64 [[TMP39]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE70]]
 ; CHECK:       [[PRED_STORE_CONTINUE70]]:
 ; CHECK-NEXT:    [[TMP42:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP42]], label %[[PRED_STORE_IF71:.*]], label %[[PRED_STORE_CONTINUE72:.*]]
 ; CHECK:       [[PRED_STORE_IF71]]:
-; CHECK-NEXT:    [[TMP43:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]]
-; CHECK-NEXT:    store i64 [[TMP43]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]]
+; CHECK-NEXT:    [[TMP41:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]]
+; CHECK-NEXT:    store i64 [[TMP41]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE72]]
 ; CHECK:       [[PRED_STORE_CONTINUE72]]:
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <2 x i1> [[TMP37]], i32 1
 ; CHECK-NEXT:    br i1 [[TMP44]], label %[[PRED_STORE_IF73:.*]], label %[[PRED_STORE_CONTINUE74]]
 ; CHECK:       [[PRED_STORE_IF73]]:
-; CHECK-NEXT:    [[TMP45:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]]
-; CHECK-NEXT:    store i64 [[TMP45]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]]
+; CHECK-NEXT:    [[TMP43:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]]
+; CHECK-NEXT:    store i64 [[TMP43]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE74]]
 ; CHECK:       [[PRED_STORE_CONTINUE74]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
index 553989544c7787..b319e810495a5c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll
@@ -356,7 +356,7 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 {
 ; PRED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
 ; PRED-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP15]])
 ; PRED-NEXT:    [[TMP24:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true>
-; PRED-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; PRED-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; PRED-NEXT:    [[TMP25:%.*]] = extractelement <2 x i1> [[TMP24]], i32 0
 ; PRED-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; PRED:       middle.block:
@@ -550,7 +550,7 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 {
 ; PRED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; PRED-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]])
 ; PRED-NEXT:    [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true, i1 true, i1 true>
-; PRED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; PRED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; PRED-NEXT:    [[TMP36:%.*]] = extractelement <4 x i1> [[TMP35]], i32 0
 ; PRED-NEXT:    br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; PRED:       middle.block:
@@ -747,7 +747,7 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 {
 ; PRED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; PRED-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]])
 ; PRED-NEXT:    [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], <i1 true, i1 true, i1 true, i1 true>
-; PRED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; PRED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; PRED-NEXT:    [[TMP35:%.*]] = extractelement <4 x i1> [[TMP34]], i32 0
 ; PRED-NEXT:    br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; PRED:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
index 8080c3a9ba0a7d..5f4fc4af89c60d 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
@@ -30,8 +30,8 @@ define i32 @multi_exit_iv_uniform(i32 %a, i64 %N, ptr %dst) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[TMP5]], i32 4
 ; CHECK-NEXT:    store <4 x i64> [[TMP7]], ptr [[TMP8]], align 8
 ; CHECK-NEXT:    store <4 x i64> [[TMP7]], ptr [[TMP9]], align 8
-; CHECK-NEXT:    [[TMP10]] = add <4 x i32> [[VEC_PHI]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP11]] = add <4 x i32> [[VEC_PHI1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP10]] = add <4 x i32> [[VEC_PHI]], splat (i32 -1)
+; CHECK-NEXT:    [[TMP11]] = add <4 x i32> [[VEC_PHI1]], splat (i32 -1)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -227,7 +227,7 @@ define void @wide_truncated_iv(ptr %dst) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i8> [[VEC_IND]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP2]], i32 0
@@ -235,7 +235,7 @@ define void @wide_truncated_iv(ptr %dst) {
 ; CHECK-NEXT:    store <8 x i8> [[VEC_IND]], ptr [[TMP4]], align 1
 ; CHECK-NEXT:    store <8 x i8> [[STEP_ADD]], ptr [[TMP5]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[STEP_ADD]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[STEP_ADD]], splat (i8 8)
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 192
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
@@ -419,6 +419,71 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
 ; PRED-NEXT:    [[P_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
 ; PRED-NEXT:    ret i64 [[P_LCSSA]]
 ;
+; CHECK-LABEL: define i64 @test_ptr_ivs_and_widened_ivs(
+; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
+; CHECK-NEXT:    [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-NEXT:    [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[WIDE_LOAD]], splat (i32 1)
+; CHECK-NEXT:    [[TMP8:%.*]] = zext <4 x i32> [[TMP7]] to <4 x i64>
+; CHECK-NEXT:    [[TMP9:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64>
+; CHECK-NEXT:    [[TMP10]] = shl <4 x i64> [[TMP8]], [[TMP9]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP10]], i32 3
+; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP10]], i32 2
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[P:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[SHL:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[L]], 1
+; CHECK-NEXT:    [[NOT_EXT:%.*]] = zext i32 [[NOT]] to i64
+; CHECK-NEXT:    [[IV_EXT:%.*]] = zext i32 [[IV_1]] to i64
+; CHECK-NEXT:    [[SHL]] = shl i64 [[NOT_EXT]], [[IV_EXT]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4
+; CHECK-NEXT:    [[IV_1_NEXT]] = add i32 [[IV_1]], 1
+; CHECK-NEXT:    [[IV_2_NEXT]] = add i32 [[IV_2]], 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_2_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[P_LCSSA:%.*]] = phi i64 [ [[P]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i64 [[P_LCSSA]]
+;
 entry:
   br label %loop
 
@@ -510,6 +575,57 @@ define void @zext_iv_increment(ptr %dst, i64 %N) {
 ; PRED:       exit:
 ; PRED-NEXT:    ret void
 ;
+; CHECK-LABEL: define void @zext_iv_increment(
+; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+; CHECK:       vector.scevcheck:
+; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[UMAX]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 1, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
+; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[TMP7]], i32 2
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2
+; CHECK-NEXT:    store i32 0, ptr [[TMP9]], align 8
+; CHECK-NEXT:    store i32 0, ptr [[TMP10]], align 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV_WIDE:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_NEXT_EXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[PATCH_INDEX:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[IV_WIDE]], i32 2
+; CHECK-NEXT:    store i32 0, ptr [[PATCH_INDEX]], align 8
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[IV_NEXT_EXT]] = zext i32 [[IV_NEXT]] to i64
+; CHECK-NEXT:    [[EC:%.*]] = icmp ult i64 [[IV_NEXT_EXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EC]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -538,4 +654,8 @@ exit:
 ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
 ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
 ; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
+; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
 ;.
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
index 61bd8c51e16052..106d935a1dfeba 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -27,7 +27,7 @@ define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q,
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
-; CHECK-NEXT:    [[TMP4:%.*]] = add <16 x i8> [[WIDE_LOAD]], <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <16 x i8> [[WIDE_LOAD]], splat (i8 2)
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1
@@ -47,21 +47,21 @@ define void @add_a(ptr noalias nocapture readonly %p, ptr noalias nocapture %q,
 ; CHECK-NEXT:    [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX5]], 0
+; CHECK-NEXT:    [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX4]], 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add <8 x i8> [[WIDE_LOAD6]], <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1
+; CHECK-NEXT:    [[TMP11:%.*]] = add <8 x i8> [[WIDE_LOAD5]], splat (i8 2)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0
 ; CHECK-NEXT:    store <8 x i8> [[TMP11]], ptr [[TMP13]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 8
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]]
+; CHECK-NEXT:    [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 8
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
-; CHECK-NEXT:    br i1 [[CMP_N4]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT:    [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
+; CHECK-NEXT:    br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -130,7 +130,7 @@ define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q,
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
-; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw <16 x i8> [[WIDE_LOAD]], <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw <16 x i8> [[WIDE_LOAD]], splat (i8 2)
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1
@@ -150,21 +150,21 @@ define void @add_a1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q,
 ; CHECK-NEXT:    [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX5]], 0
+; CHECK-NEXT:    [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX4]], 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add nuw nsw <8 x i8> [[WIDE_LOAD6]], <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1
+; CHECK-NEXT:    [[TMP11:%.*]] = add nuw nsw <8 x i8> [[WIDE_LOAD5]], splat (i8 2)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0
 ; CHECK-NEXT:    store <8 x i8> [[TMP11]], ptr [[TMP13]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 8
-; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]]
+; CHECK-NEXT:    [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 8
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
-; CHECK-NEXT:    br i1 [[CMP_N4]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT:    [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
+; CHECK-NEXT:    br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -224,7 +224,7 @@ define void @add_b(ptr noalias nocapture readonly %p, ptr noalias nocapture %q,
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2
-; CHECK-NEXT:    [[TMP4:%.*]] = add <8 x i16> [[WIDE_LOAD]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <8 x i16> [[WIDE_LOAD]], splat (i16 2)
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    store <8 x i16> [[TMP4]], ptr [[TMP6]], align 2
@@ -301,7 +301,7 @@ define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q,
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16>
-; CHECK-NEXT:    [[TMP5:%.*]] = add <16 x i16> [[TMP4]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = add <16 x i16> [[TMP4]], splat (i16 2)
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0
 ; CHECK-NEXT:    store <16 x i16> [[TMP5]], ptr [[TMP7]], align 2
@@ -321,22 +321,22 @@ define void @add_c(ptr noalias nocapture readonly %p, ptr noalias nocapture %q,
 ; CHECK-NEXT:    [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]]
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX5]], 0
+; CHECK-NEXT:    [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX4]], 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x i8>, ptr [[TMP11]], align 1
-; CHECK-NEXT:    [[TMP12:%.*]] = zext <8 x i8> [[WIDE_LOAD6]] to <8 x i16>
-; CHECK-NEXT:    [[TMP13:%.*]] = add <8 x i16> [[TMP12]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x i8>, ptr [[TMP11]], align 1
+; CHECK-NEXT:    [[TMP12:%.*]] = zext <8 x i8> [[WIDE_LOAD5]] to <8 x i16>
+; CHECK-NEXT:    [[TMP13:%.*]] = add <8 x i16> [[TMP12]], splat (i16 2)
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 [[TMP9]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP14]], i32 0
 ; CHECK-NEXT:    store <8 x i16> [[TMP13]], ptr [[TMP15]], align 2
-; CHECK-NEXT:    [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 8
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]]
+; CHECK-NEXT:    [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 8
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
-; CHECK-NEXT:    br i1 [[CMP_N4]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT:    [[CMP_N7:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]]
+; CHECK-NEXT:    br i1 [[CMP_N7]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -401,7 +401,7 @@ define void @add_d(ptr noalias nocapture readonly %p, ptr noalias nocapture %q,
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = sext <8 x i16> [[WIDE_LOAD]] to <8 x i32>
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <8 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <8 x i32> [[TMP4]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
 ; CHECK-NEXT:    store <8 x i32> [[TMP5]], ptr [[TMP7]], align 4
@@ -483,12 +483,12 @@ define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q,
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
-; CHECK-NEXT:    [[TMP6:%.*]] = shl <16 x i8> [[WIDE_LOAD]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
-; CHECK-NEXT:    [[TMP7:%.*]] = add <16 x i8> [[TMP6]], <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>
-; CHECK-NEXT:    [[TMP8:%.*]] = or <16 x i8> [[WIDE_LOAD]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
-; CHECK-NEXT:    [[TMP9:%.*]] = mul <16 x i8> [[TMP8]], <i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60>
+; CHECK-NEXT:    [[TMP6:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 4)
+; CHECK-NEXT:    [[TMP7:%.*]] = add <16 x i8> [[TMP6]], splat (i8 32)
+; CHECK-NEXT:    [[TMP8:%.*]] = or <16 x i8> [[WIDE_LOAD]], splat (i8 51)
+; CHECK-NEXT:    [[TMP9:%.*]] = mul <16 x i8> [[TMP8]], splat (i8 60)
 ; CHECK-NEXT:    [[TMP10:%.*]] = and <16 x i8> [[TMP7]], [[TMP1]]
-; CHECK-NEXT:    [[TMP11:%.*]] = and <16 x i8> [[TMP9]], <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
+; CHECK-NEXT:    [[TMP11:%.*]] = and <16 x i8> [[TMP9]], splat (i8 -4)
 ; CHECK-NEXT:    [[TMP12:%.*]] = xor <16 x i8> [[TMP11]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul <16 x i8> [[TMP12]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]]
@@ -508,36 +508,36 @@ define void @add_e(ptr noalias nocapture readonly %p, ptr noalias nocapture %q,
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 8
 ; CHECK-NEXT:    [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT7]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP17:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT8]] to <8 x i8>
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT9]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP18:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT10]] to <8 x i8>
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT7:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT6]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT7]] to <8 x i8>
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP18:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT9]] to <8 x i8>
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX11]], 0
+; CHECK-NEXT:    [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX10]], 0
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[TMP19]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1
-; CHECK-NEXT:    [[TMP22:%.*]] = shl <8 x i8> [[WIDE_LOAD12]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
-; CHECK-NEXT:    [[TMP23:%.*]] = add <8 x i8> [[TMP22]], <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>
-; CHECK-NEXT:    [[TMP24:%.*]] = or <8 x i8> [[WIDE_LOAD12]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
-; CHECK-NEXT:    [[TMP25:%.*]] = mul <8 x i8> [[TMP24]], <i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60>
+; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <8 x i8>, ptr [[TMP21]], align 1
+; CHECK-NEXT:    [[TMP22:%.*]] = shl <8 x i8> [[WIDE_LOAD11]], splat (i8 4)
+; CHECK-NEXT:    [[TMP23:%.*]] = add <8 x i8> [[TMP22]], splat (i8 32)
+; CHECK-NEXT:    [[TMP24:%.*]] = or <8 x i8> [[WIDE_LOAD11]], splat (i8 51)
+; CHECK-NEXT:    [[TMP25:%.*]] = mul <8 x i8> [[TMP24]], splat (i8 60)
 ; CHECK-NEXT:    [[TMP26:%.*]] = and <8 x i8> [[TMP23]], [[TMP17]]
-; CHECK-NEXT:    [[TMP27:%.*]] = and <8 x i8> [[TMP25]], <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
+; CHECK-NEXT:    [[TMP27:%.*]] = and <8 x i8> [[TMP25]], splat (i8 -4)
 ; CHECK-NEXT:    [[TMP28:%.*]] = xor <8 x i8> [[TMP27]], [[TMP18]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = mul <8 x i8> [[TMP28]], [[TMP26]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP19]]
 ; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP30]], i32 0
 ; CHECK-NEXT:    store <8 x i8> [[TMP29]], ptr [[TMP31]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT13]] = add nuw i64 [[INDEX11]], 8
-; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC5]]
+; CHECK-NEXT:    [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], 8
+; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]]
 ; CHECK-NEXT:    br i1 [[TMP32]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    [[CMP_N6:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
-; CHECK-NEXT:    br i1 [[CMP_N6]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT:    [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
+; CHECK-NEXT:    br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -632,13 +632,13 @@ define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q,
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[TMP5]], align 2
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <16 x i16> [[WIDE_LOAD]] to <16 x i8>
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <16 x i8> [[TMP6]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
-; CHECK-NEXT:    [[TMP8:%.*]] = add <16 x i8> [[TMP7]], <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>
-; CHECK-NEXT:    [[TMP9:%.*]] = and <16 x i8> [[TMP6]], <i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52>
-; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i8> [[TMP9]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
-; CHECK-NEXT:    [[TMP11:%.*]] = mul <16 x i8> [[TMP10]], <i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <16 x i8> [[TMP6]], splat (i8 4)
+; CHECK-NEXT:    [[TMP8:%.*]] = add <16 x i8> [[TMP7]], splat (i8 32)
+; CHECK-NEXT:    [[TMP9:%.*]] = and <16 x i8> [[TMP6]], splat (i8 -52)
+; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i8> [[TMP9]], splat (i8 51)
+; CHECK-NEXT:    [[TMP11:%.*]] = mul <16 x i8> [[TMP10]], splat (i8 60)
 ; CHECK-NEXT:    [[TMP12:%.*]] = and <16 x i8> [[TMP8]], [[TMP1]]
-; CHECK-NEXT:    [[TMP13:%.*]] = and <16 x i8> [[TMP11]], <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
+; CHECK-NEXT:    [[TMP13:%.*]] = and <16 x i8> [[TMP11]], splat (i8 -4)
 ; CHECK-NEXT:    [[TMP14:%.*]] = xor <16 x i8> [[TMP13]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = mul <16 x i8> [[TMP14]], [[TMP12]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]]
@@ -658,38 +658,38 @@ define void @add_f(ptr noalias nocapture readonly %p, ptr noalias nocapture %q,
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[N_MOD_VF4:%.*]] = urem i64 [[TMP0]], 8
 ; CHECK-NEXT:    [[N_VEC5:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF4]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT7]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP19:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT8]] to <8 x i8>
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT9]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP20:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT10]] to <8 x i8>
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <8 x i32> poison, i32 [[CONV13]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT7:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT6]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP19:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT7]] to <8 x i8>
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x i32> poison, i32 [[CONV11]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT8]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP20:%.*]] = trunc <8 x i32> [[BROADCAST_SPLAT9]] to <8 x i8>
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX11:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX11]], 0
+; CHECK-NEXT:    [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX10]], 0
 ; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP21]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i16, ptr [[TMP22]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <8 x i16>, ptr [[TMP23]], align 2
-; CHECK-NEXT:    [[TMP24:%.*]] = trunc <8 x i16> [[WIDE_LOAD12]] to <8 x i8>
-; CHECK-NEXT:    [[TMP25:%.*]] = shl <8 x i8> [[TMP24]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
-; CHECK-NEXT:    [[TMP26:%.*]] = add <8 x i8> [[TMP25]], <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>
-; CHECK-NEXT:    [[TMP27:%.*]] = and <8 x i8> [[TMP24]], <i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52, i8 -52>
-; CHECK-NEXT:    [[TMP28:%.*]] = or <8 x i8> [[TMP27]], <i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51, i8 51>
-; CHECK-NEXT:    [[TMP29:%.*]] = mul <8 x i8> [[TMP28]], <i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60, i8 60>
+; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <8 x i16>, ptr [[TMP23]], align 2
+; CHECK-NEXT:    [[TMP24:%.*]] = trunc <8 x i16> [[WIDE_LOAD11]] to <8 x i8>
+; CHECK-NEXT:    [[TMP25:%.*]] = shl <8 x i8> [[TMP24]], splat (i8 4)
+; CHECK-NEXT:    [[TMP26:%.*]] = add <8 x i8> [[TMP25]], splat (i8 32)
+; CHECK-NEXT:    [[TMP27:%.*]] = and <8 x i8> [[TMP24]], splat (i8 -52)
+; CHECK-NEXT:    [[TMP28:%.*]] = or <8 x i8> [[TMP27]], splat (i8 51)
+; CHECK-NEXT:    [[TMP29:%.*]] = mul <8 x i8> [[TMP28]], splat (i8 60)
 ; CHECK-NEXT:    [[TMP30:%.*]] = and <8 x i8> [[TMP26]], [[TMP19]]
-; CHECK-NEXT:    [[TMP31:%.*]] = and <8 x i8> [[TMP29]], <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4>
+; CHECK-NEXT:    [[TMP31:%.*]] = and <8 x i8> [[TMP29]], splat (i8 -4)
 ; CHECK-NEXT:    [[TMP32:%.*]] = xor <8 x i8> [[TMP31]], [[TMP20]]
 ; CHECK-NEXT:    [[TMP33:%.*]] = mul <8 x i8> [[TMP32]], [[TMP30]]
 ; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP21]]
 ; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP34]], i32 0
 ; CHECK-NEXT:    store <8 x i8> [[TMP33]], ptr [[TMP35]], align 1
-; CHECK-NEXT:    [[INDEX_NEXT13]] = add nuw i64 [[INDEX11]], 8
-; CHECK-NEXT:    [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC5]]
+; CHECK-NEXT:    [[INDEX_NEXT12]] = add nuw i64 [[INDEX10]], 8
+; CHECK-NEXT:    [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC5]]
 ; CHECK-NEXT:    br i1 [[TMP36]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    [[CMP_N6:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
-; CHECK-NEXT:    br i1 [[CMP_N6]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT:    [[CMP_N13:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC5]]
+; CHECK-NEXT:    br i1 [[CMP_N13]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
@@ -776,14 +776,14 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP4]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
-; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw <16 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP7:%.*]] = trunc <16 x i32> [[TMP6]] to <16 x i8>
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
-; CHECK-NEXT:    store <16 x i8> [[TMP7]], ptr [[TMP9]], align 1
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw <16 x i32> [[TMP4]], splat (i32 2)
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc <16 x i32> [[TMP5]] to <16 x i8>
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0
+; CHECK-NEXT:    store <16 x i8> [[TMP6]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
@@ -798,10 +798,10 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[A_PHI:%.*]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[CONV]] = zext i8 [[TMP11]] to i32
+; CHECK-NEXT:    [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV]] = zext i8 [[TMP10]] to i32
 ; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2
 ; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[ADD]] to i8
 ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]
@@ -859,14 +859,14 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
 ; CHECK-NEXT:    [[TMP6]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32>
-; CHECK-NEXT:    [[TMP8:%.*]] = add nuw nsw <16 x i32> [[TMP6]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP9:%.*]] = trunc <16 x i32> [[TMP8]] to <16 x i8>
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
-; CHECK-NEXT:    store <16 x i8> [[TMP9]], ptr [[TMP11]], align 1
+; CHECK-NEXT:    [[TMP7:%.*]] = add nuw nsw <16 x i32> [[TMP6]], splat (i32 2)
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i8>
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
+; CHECK-NEXT:    store <16 x i8> [[TMP8]], ptr [[TMP10]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15
 ; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14
@@ -877,15 +877,15 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture
 ; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[A_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR:%.*]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[A_PHI_LCSSA:%.*]] = phi i32 [ [[A_PHI:%.*]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[A_PHI_LCSSA]] to i8
 ; CHECK-NEXT:    ret i8 [[RET]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[A_PHI]] = phi i32 [ [[CONV:%.*]], [[FOR_BODY]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT:    [[CONV]] = zext i8 [[TMP13]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV]] = zext i8 [[TMP12]] to i32
 ; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[CONV]], 2
 ; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[ADD]] to i8
 ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[Q]], i64 [[INDVARS_IV]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll
index a8caae40aed161..4c31cfc14afb30 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/streaming-compatible-sve-no-maximize-bandwidth.ll
@@ -47,7 +47,7 @@ define i32 @foo(i32 noundef %n, i32 noundef %lag, i32 noundef %shift) vscale_ran
 ; SC_SVE-NEXT:    [[TMP16:%.*]] = shl <4 x i32> [[TMP15]], [[BROADCAST_SPLAT]]
 ; SC_SVE-NEXT:    [[TMP17]] = add <4 x i32> [[TMP16]], [[VEC_PHI]]
 ; SC_SVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; SC_SVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; SC_SVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; SC_SVE-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; SC_SVE-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; SC_SVE:       middle.block:
@@ -122,7 +122,7 @@ define i32 @foo(i32 noundef %n, i32 noundef %lag, i32 noundef %shift) vscale_ran
 ; NO_SC_SVE-NEXT:    [[TMP16:%.*]] = shl <8 x i32> [[TMP15]], [[BROADCAST_SPLAT]]
 ; NO_SC_SVE-NEXT:    [[TMP17]] = add <8 x i32> [[TMP16]], [[VEC_PHI]]
 ; NO_SC_SVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; NO_SC_SVE-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; NO_SC_SVE-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; NO_SC_SVE-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; NO_SC_SVE-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; NO_SC_SVE:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
index 87326d3a4a7926..c38c4b08cb5b57 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll
@@ -138,7 +138,7 @@ define void @main_vf_vscale_x_16(ptr %A) #0 {
 ; CHECK-VF8-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX1]], 0
 ; CHECK-VF8-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP19]]
 ; CHECK-VF8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0
-; CHECK-VF8-NEXT:    store <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, ptr [[TMP21]], align 1
+; CHECK-VF8-NEXT:    store <8 x i8> splat (i8 1), ptr [[TMP21]], align 1
 ; CHECK-VF8-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
 ; CHECK-VF8-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
 ; CHECK-VF8-NEXT:    br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -232,7 +232,7 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) {
 ; CHECK-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX1]], 0
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0
-; CHECK-NEXT:    store <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, ptr [[TMP21]], align 1
+; CHECK-NEXT:    store <8 x i64> splat (i64 1), ptr [[TMP21]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
 ; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
 ; CHECK-NEXT:    br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -295,7 +295,7 @@ define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) {
 ; CHECK-VF8-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX1]], 0
 ; CHECK-VF8-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]]
 ; CHECK-VF8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0
-; CHECK-VF8-NEXT:    store <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, ptr [[TMP21]], align 1
+; CHECK-VF8-NEXT:    store <8 x i64> splat (i64 1), ptr [[TMP21]], align 1
 ; CHECK-VF8-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
 ; CHECK-VF8-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
 ; CHECK-VF8-NEXT:    br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
index f53fd5bc8d15a0..c017c1f3b36ee8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
@@ -20,7 +20,7 @@ define void @test0(ptr noalias %M3, ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2
-; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i16> [[WIDE_LOAD]], <i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i16> [[WIDE_LOAD]], splat (i16 10)
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
index 57807604b37a87..1dd49ecf85b81b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll
@@ -49,8 +49,8 @@ define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N)
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison)
 ; CHECK-NEXT:    [[REVERSE5:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE5]], <4 x double> poison)
-; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
-; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP10:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], splat (double 1.000000e+00)
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP10]], ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]])
 ; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP11]], ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE5]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
index c0933a9445f94d..d129751397a6bf 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll
@@ -20,7 +20,7 @@ define void @test_stride1_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
@@ -84,7 +84,7 @@ define void @test_stride-1_4i32(ptr readonly %data, ptr noalias nocapture %dst,
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -3
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[REVERSE]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> splat (i32 5), [[REVERSE]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
@@ -150,7 +150,7 @@ define void @test_stride2_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP4]]
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[STRIDED_VEC]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> splat (i32 5), [[STRIDED_VEC]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP2]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
@@ -209,16 +209,16 @@ define void @test_stride3_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], splat (i32 3)
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP2]]
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_GATHER]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
@@ -273,16 +273,16 @@ define void @test_stride4_4i32(ptr readonly %data, ptr noalias nocapture %dst, i
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw <4 x i32> [[TMP1]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP2]]
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_GATHER]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
@@ -343,7 +343,7 @@ define void @test_stride_loopinvar_4i32(ptr readonly %data, ptr noalias nocaptur
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_LOAD]]
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_LOAD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP4]], ptr [[TMP6]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
@@ -405,16 +405,16 @@ define void @test_stride_noninvar_4i32(ptr readonly %data, ptr noalias nocapture
 ; CHECK-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 3, i32 11, i32 19, i32 27>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], [[VEC_IND2]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw <4 x i32> [[TMP2]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw <4 x i32> [[TMP2]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP3]]
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_GATHER]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP1]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 32, i32 32, i32 32, i32 32>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], splat (i32 32)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
@@ -515,7 +515,7 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
-; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> <i32 3, i32 3, i32 3, i32 3>, [[TMP1]]
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> splat (i32 3), [[TMP1]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[X]], 4
 ; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
@@ -526,15 +526,15 @@ define void @test_stride_noninvar3_4i32(ptr readonly %data, ptr noalias nocaptur
 ; CHECK-NEXT:    [[VEC_IND4:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[VEC_IND]], [[VEC_IND4]]
-; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP4]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], <4 x i32> [[TMP5]]
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP6]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[WIDE_MASKED_GATHER]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> splat (i32 5), [[WIDE_MASKED_GATHER]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[VEC_IND_NEXT5]] = add <4 x i32> [[VEC_IND4]], [[DOTSPLAT3]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll
index fec5921720fed9..fde594d0483613 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-qabs.ll
@@ -29,9 +29,9 @@ define void @arm_abs_q7(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %
 ; CHECK-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <16 x i8> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 -128)
 ; CHECK-NEXT:    [[TMP3:%.*]] = sub <16 x i8> zeroinitializer, [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP4:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>, <16 x i8> [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> splat (i8 127), <16 x i8> [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[WIDE_LOAD]], <16 x i8> [[TMP4]]
 ; CHECK-NEXT:    store <16 x i8> [[TMP5]], ptr [[NEXT_GEP7]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
@@ -124,9 +124,9 @@ define void @arm_abs_q15(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32
 ; CHECK-NEXT:    [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX7]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <8 x i16> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <8 x i16> [[WIDE_LOAD]], splat (i16 -32768)
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub <8 x i16> zeroinitializer, [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>, <8 x i16> [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[TMP4]], <8 x i16> splat (i16 32767), <8 x i16> [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[WIDE_LOAD]], <8 x i16> [[TMP6]]
 ; CHECK-NEXT:    store <8 x i16> [[TMP7]], ptr [[NEXT_GEP8]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
@@ -219,9 +219,9 @@ define void @arm_abs_q31(ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32
 ; CHECK-NEXT:    [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PDST]], i32 [[OFFSET_IDX7]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 -2147483648)
 ; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <4 x i32> zeroinitializer, [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, <4 x i32> [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> splat (i32 2147483647), <4 x i32> [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP6]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[NEXT_GEP8]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
index 1371d80b00cadd..6011ccfd156148 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-predselect.ll
@@ -72,7 +72,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_MASKED_LOAD1]])
 ; CHECK-NEXT:    [[TMP8]] = add i32 [[TMP7]], [[TMP6]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
@@ -115,7 +115,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
@@ -167,14 +167,14 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX]], i32 257)
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP0]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP1]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_LOAD1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i32> [[TMP2]], <4 x i32> splat (i32 -1)
 ; CHECK-NEXT:    [[TMP4]] = and <4 x i32> [[VEC_PHI]], [[TMP3]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
@@ -427,7 +427,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP1]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
@@ -480,7 +480,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP1]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll
index b5ae9700217eae..832d4db53036fb 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reduction-types.ll
@@ -251,7 +251,7 @@ define i32 @mul_i32(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]]
@@ -318,7 +318,7 @@ define i32 @and_i32(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]]
@@ -586,7 +586,7 @@ define float @fmul_f32(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float 1.000000e+00), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 [[N]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X:%.*]], i32 [[TMP0]]
@@ -653,7 +653,7 @@ define i32 @smin_i32(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 2147483647), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
@@ -722,7 +722,7 @@ define i32 @smax_i32(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
@@ -791,7 +791,7 @@ define i32 @umin_i32(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
index 3432773b4e1b3d..1015ba7bdf7a8b 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
@@ -890,7 +890,7 @@ define hidden void @mult_ptr_iv(ptr noalias nocapture readonly %x, ptr noalias n
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP0]], i32 2
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP2]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> poison), !alias.scope [[META28]]
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> [[TMP3]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> poison), !alias.scope [[META28]]
-; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], <i8 10, i8 10, i8 10, i8 10>
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], splat (i8 10)
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER8]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, <4 x ptr> [[TMP1]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll
index 7db5bccd896b27..6f7b45beeb6e7a 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-fold-multiple-icmps.ll
@@ -16,8 +16,8 @@ define arm_aapcs_vfpcc i32 @minmaxval4(ptr nocapture readonly %x, ptr nocapture
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 2147483647), [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP1]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[WIDE_LOAD]], <4 x i32> [[VEC_PHI1]])
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll
index ffeca069dc0ea4..787f3c70be2521 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll
@@ -25,7 +25,7 @@ define void @trunc_not_allowed_different_vec_elemns(ptr noalias nocapture %A, pt
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i16>
-; CHECK-NEXT:    [[TMP9:%.*]] = shl <4 x i16> [[TMP8]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP9:%.*]] = shl <4 x i16> [[TMP8]], splat (i16 1)
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[D:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0
 ; CHECK-NEXT:    store <4 x i16> [[TMP9]], ptr [[TMP11]], align 2
@@ -54,7 +54,7 @@ define void @trunc_not_allowed_different_vec_elemns(ptr noalias nocapture %A, pt
 ; CHECK-NEXT:    store i16 [[CONV7]], ptr [[ARRAYIDX8]], align 2
 ; CHECK-NEXT:    [[ADD9]] = add nuw nsw i32 [[I_021]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[ADD9]], 431
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ;
 entry:
   br label %for.body
@@ -451,7 +451,7 @@ define void @fptrunc_not_allowed(ptr noalias nocapture %A, ptr noalias nocapture
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0
 ; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = fptrunc <4 x float> [[TMP5]] to <4 x half>
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <4 x half> [[TMP8]], <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <4 x half> [[TMP8]], splat (half 0xH4000)
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds half, ptr [[D:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds half, ptr [[TMP10]], i32 0
 ; CHECK-NEXT:    store <4 x half> [[TMP9]], ptr [[TMP11]], align 2
@@ -542,7 +542,7 @@ define dso_local void @select_not_allowed(ptr noalias nocapture %A, ptr noalias
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
@@ -611,7 +611,7 @@ define i32 @i32_smin_reduction(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 2147483647), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
@@ -680,7 +680,7 @@ define i32 @i32_smax_reduction(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
@@ -749,7 +749,7 @@ define i32 @i32_umin_reduction(ptr nocapture readonly %x, i32 %n) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll
index 4888f3d6383c85..55fd3530806d25 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll
@@ -33,13 +33,13 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
 ; CHECK-NEXT:    [[VEC_PHI15:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP48:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI16:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP49:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI17:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP50:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; CHECK-NEXT:    [[STEP_ADD4:%.*]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
-; CHECK-NEXT:    [[STEP_ADD5:%.*]] = add <2 x i64> [[STEP_ADD4]], <i64 2, i64 2>
-; CHECK-NEXT:    [[STEP_ADD6:%.*]] = add <2 x i64> [[STEP_ADD5]], <i64 2, i64 2>
-; CHECK-NEXT:    [[STEP_ADD7:%.*]] = add <2 x i64> [[STEP_ADD6]], <i64 2, i64 2>
-; CHECK-NEXT:    [[STEP_ADD8:%.*]] = add <2 x i64> [[STEP_ADD7]], <i64 2, i64 2>
-; CHECK-NEXT:    [[STEP_ADD9:%.*]] = add <2 x i64> [[STEP_ADD8]], <i64 2, i64 2>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD_4:%.*]] = add <2 x i64> [[STEP_ADD_3]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD_5:%.*]] = add <2 x i64> [[STEP_ADD_4]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD_6:%.*]] = add <2 x i64> [[STEP_ADD_5]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD_7:%.*]] = add <2 x i64> [[STEP_ADD_6]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
@@ -66,14 +66,14 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = zext <2 x i8> [[WIDE_LOAD29]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP25:%.*]] = zext <2 x i8> [[WIDE_LOAD30]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP26:%.*]] = zext <2 x i8> [[WIDE_LOAD31]] to <2 x i64>
-; CHECK-NEXT:    [[TMP27:%.*]] = shl <2 x i64> [[VEC_IND]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP28:%.*]] = shl <2 x i64> [[STEP_ADD]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP29:%.*]] = shl <2 x i64> [[STEP_ADD4]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP30:%.*]] = shl <2 x i64> [[STEP_ADD5]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP31:%.*]] = shl <2 x i64> [[STEP_ADD6]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP32:%.*]] = shl <2 x i64> [[STEP_ADD7]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP33:%.*]] = shl <2 x i64> [[STEP_ADD8]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP34:%.*]] = shl <2 x i64> [[STEP_ADD9]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP27:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT:    [[TMP28:%.*]] = shl <2 x i64> [[STEP_ADD]], splat (i64 1)
+; CHECK-NEXT:    [[TMP29:%.*]] = shl <2 x i64> [[STEP_ADD_2]], splat (i64 1)
+; CHECK-NEXT:    [[TMP30:%.*]] = shl <2 x i64> [[STEP_ADD_3]], splat (i64 1)
+; CHECK-NEXT:    [[TMP31:%.*]] = shl <2 x i64> [[STEP_ADD_4]], splat (i64 1)
+; CHECK-NEXT:    [[TMP32:%.*]] = shl <2 x i64> [[STEP_ADD_5]], splat (i64 1)
+; CHECK-NEXT:    [[TMP33:%.*]] = shl <2 x i64> [[STEP_ADD_6]], splat (i64 1)
+; CHECK-NEXT:    [[TMP34:%.*]] = shl <2 x i64> [[STEP_ADD_7]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP35:%.*]] = shl <2 x i64> [[TMP19]], [[TMP27]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = shl <2 x i64> [[TMP20]], [[TMP28]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = shl <2 x i64> [[TMP21]], [[TMP29]]
@@ -91,7 +91,7 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
 ; CHECK-NEXT:    [[TMP49]] = or <2 x i64> [[TMP41]], [[VEC_PHI16]]
 ; CHECK-NEXT:    [[TMP50]] = or <2 x i64> [[TMP42]], [[VEC_PHI17]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD9]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD_7]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll
index a6797dea0836c2..ba9d49fc682c4c 100644
--- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll
@@ -372,14 +372,14 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n)
 ; VF-TWO-CHECK-NEXT:    [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; VF-TWO-CHECK-NEXT:    [[WIDE_LOAD14:%.*]] = load <4 x float>, ptr [[TMP71]], align 4
 ; VF-TWO-CHECK-NEXT:    [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; VF-TWO-CHECK-NEXT:    [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-TWO-CHECK-NEXT:    [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-TWO-CHECK-NEXT:    [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-TWO-CHECK-NEXT:    [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-TWO-CHECK-NEXT:    [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-TWO-CHECK-NEXT:    [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-TWO-CHECK-NEXT:    [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-TWO-CHECK-NEXT:    [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; VF-TWO-CHECK-NEXT:    [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00)
+; VF-TWO-CHECK-NEXT:    [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], splat (float 1.000000e+00)
+; VF-TWO-CHECK-NEXT:    [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], splat (float 1.000000e+00)
+; VF-TWO-CHECK-NEXT:    [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], splat (float 1.000000e+00)
+; VF-TWO-CHECK-NEXT:    [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], splat (float 1.000000e+00)
+; VF-TWO-CHECK-NEXT:    [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], splat (float 1.000000e+00)
+; VF-TWO-CHECK-NEXT:    [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00)
+; VF-TWO-CHECK-NEXT:    [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00)
 ; VF-TWO-CHECK-NEXT:    [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]]
 ; VF-TWO-CHECK-NEXT:    [[TMP88:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 0
 ; VF-TWO-CHECK-NEXT:    [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4
@@ -427,7 +427,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n)
 ; VF-TWO-CHECK-NEXT:    [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -1
 ; VF-TWO-CHECK-NEXT:    [[WIDE_LOAD23:%.*]] = load <2 x float>, ptr [[TMP104]], align 4
 ; VF-TWO-CHECK-NEXT:    [[REVERSE24:%.*]] = shufflevector <2 x float> [[WIDE_LOAD23]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
-; VF-TWO-CHECK-NEXT:    [[TMP105:%.*]] = fadd fast <2 x float> [[REVERSE24]], <float 1.000000e+00, float 1.000000e+00>
+; VF-TWO-CHECK-NEXT:    [[TMP105:%.*]] = fadd fast <2 x float> [[REVERSE24]], splat (float 1.000000e+00)
 ; VF-TWO-CHECK-NEXT:    [[TMP106:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP97]]
 ; VF-TWO-CHECK-NEXT:    [[TMP107:%.*]] = getelementptr inbounds float, ptr [[TMP106]], i32 0
 ; VF-TWO-CHECK-NEXT:    store <2 x float> [[TMP105]], ptr [[TMP107]], align 4
@@ -531,14 +531,14 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n)
 ; VF-FOUR-CHECK-NEXT:    [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; VF-FOUR-CHECK-NEXT:    [[WIDE_LOAD14:%.*]] = load <4 x float>, ptr [[TMP71]], align 4
 ; VF-FOUR-CHECK-NEXT:    [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; VF-FOUR-CHECK-NEXT:    [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-FOUR-CHECK-NEXT:    [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-FOUR-CHECK-NEXT:    [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-FOUR-CHECK-NEXT:    [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-FOUR-CHECK-NEXT:    [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-FOUR-CHECK-NEXT:    [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-FOUR-CHECK-NEXT:    [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; VF-FOUR-CHECK-NEXT:    [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; VF-FOUR-CHECK-NEXT:    [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00)
+; VF-FOUR-CHECK-NEXT:    [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], splat (float 1.000000e+00)
+; VF-FOUR-CHECK-NEXT:    [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], splat (float 1.000000e+00)
+; VF-FOUR-CHECK-NEXT:    [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], splat (float 1.000000e+00)
+; VF-FOUR-CHECK-NEXT:    [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], splat (float 1.000000e+00)
+; VF-FOUR-CHECK-NEXT:    [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], splat (float 1.000000e+00)
+; VF-FOUR-CHECK-NEXT:    [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00)
+; VF-FOUR-CHECK-NEXT:    [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00)
 ; VF-FOUR-CHECK-NEXT:    [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]]
 ; VF-FOUR-CHECK-NEXT:    [[TMP88:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 0
 ; VF-FOUR-CHECK-NEXT:    [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4
@@ -586,7 +586,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n)
 ; VF-FOUR-CHECK-NEXT:    [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -3
 ; VF-FOUR-CHECK-NEXT:    [[WIDE_LOAD23:%.*]] = load <4 x float>, ptr [[TMP104]], align 4
 ; VF-FOUR-CHECK-NEXT:    [[REVERSE24:%.*]] = shufflevector <4 x float> [[WIDE_LOAD23]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; VF-FOUR-CHECK-NEXT:    [[TMP105:%.*]] = fadd fast <4 x float> [[REVERSE24]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; VF-FOUR-CHECK-NEXT:    [[TMP105:%.*]] = fadd fast <4 x float> [[REVERSE24]], splat (float 1.000000e+00)
 ; VF-FOUR-CHECK-NEXT:    [[TMP106:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP97]]
 ; VF-FOUR-CHECK-NEXT:    [[TMP107:%.*]] = getelementptr inbounds float, ptr [[TMP106]], i32 0
 ; VF-FOUR-CHECK-NEXT:    store <4 x float> [[TMP105]], ptr [[TMP107]], align 4
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
index 0e55ad65cdb2c2..8e24b51f1083dd 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
@@ -425,7 +425,7 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 4, i64 8, i64 12, i64 16, i64 20, i64 24, i64 28>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 32)
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32
@@ -444,7 +444,7 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP11]], <8 x ptr> [[TMP13]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP12]], <8 x ptr> [[TMP14]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
index 6c8e3606c53f2f..ab62addb94b9f5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll
@@ -486,8 +486,8 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; FIXED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4
 ; FIXED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
 ; FIXED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
-; FIXED-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>
-; FIXED-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>
+; FIXED-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
+; FIXED-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
 ; FIXED-NEXT:    [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], [[TMP5]]
 ; FIXED-NEXT:    [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP6]]
 ; FIXED-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD]]
@@ -614,8 +614,8 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) {
 ; FIXED-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4
 ; FIXED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
 ; FIXED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
-; FIXED-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>
-; FIXED-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>
+; FIXED-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
+; FIXED-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1)
 ; FIXED-NEXT:    [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], [[TMP5]]
 ; FIXED-NEXT:    [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP6]]
 ; FIXED-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD]]
@@ -736,10 +736,10 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) {
 ; FIXED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4
 ; FIXED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
 ; FIXED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
-; FIXED-NEXT:    [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], <i64 42, i64 42, i64 42, i64 42>
-; FIXED-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], <i64 42, i64 42, i64 42, i64 42>
-; FIXED-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], <i64 27, i64 27, i64 27, i64 27>
-; FIXED-NEXT:    [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], <i64 27, i64 27, i64 27, i64 27>
+; FIXED-NEXT:    [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], splat (i64 42)
+; FIXED-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], splat (i64 42)
+; FIXED-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], splat (i64 27)
+; FIXED-NEXT:    [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], splat (i64 27)
 ; FIXED-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[TMP6]], <4 x i64> [[WIDE_LOAD]]
 ; FIXED-NEXT:    [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD1]]
 ; FIXED-NEXT:    store <4 x i64> [[PREDPHI]], ptr [[TMP2]], align 8
@@ -858,10 +858,10 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) {
 ; FIXED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4
 ; FIXED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
 ; FIXED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
-; FIXED-NEXT:    [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], <i64 42, i64 42, i64 42, i64 42>
-; FIXED-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], <i64 42, i64 42, i64 42, i64 42>
-; FIXED-NEXT:    [[TMP6:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], <i64 27, i64 27, i64 27, i64 27>
-; FIXED-NEXT:    [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], <i64 27, i64 27, i64 27, i64 27>
+; FIXED-NEXT:    [[TMP4:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], splat (i64 42)
+; FIXED-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], splat (i64 42)
+; FIXED-NEXT:    [[TMP6:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], splat (i64 27)
+; FIXED-NEXT:    [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], splat (i64 27)
 ; FIXED-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[TMP6]], <4 x i64> [[WIDE_LOAD]]
 ; FIXED-NEXT:    [[PREDPHI2:%.*]] = select <4 x i1> [[TMP5]], <4 x i64> [[TMP7]], <4 x i64> [[WIDE_LOAD1]]
 ; FIXED-NEXT:    store <4 x i64> [[PREDPHI]], ptr [[TMP2]], align 8
@@ -981,10 +981,10 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) {
 ; FIXED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 32
 ; FIXED-NEXT:    [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1
 ; FIXED-NEXT:    [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP3]], align 1
-; FIXED-NEXT:    [[TMP4:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD]], <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
-; FIXED-NEXT:    [[TMP5:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
-; FIXED-NEXT:    [[TMP6:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; FIXED-NEXT:    [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; FIXED-NEXT:    [[TMP4:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD]], splat (i8 -128)
+; FIXED-NEXT:    [[TMP5:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], splat (i8 -128)
+; FIXED-NEXT:    [[TMP6:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1)
+; FIXED-NEXT:    [[TMP7:%.*]] = select <32 x i1> [[TMP5]], <32 x i8> splat (i8 -1), <32 x i8> splat (i8 1)
 ; FIXED-NEXT:    [[TMP8:%.*]] = sdiv <32 x i8> [[WIDE_LOAD]], [[TMP6]]
 ; FIXED-NEXT:    [[TMP9:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP7]]
 ; FIXED-NEXT:    [[PREDPHI:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> [[TMP8]], <32 x i8> [[WIDE_LOAD]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
index 2789ab484e1b66..ad10ddc26fc7a5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
@@ -71,8 +71,8 @@ define void @load_store_factor2_i32(ptr %p) {
 ; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP2]], align 4
 ; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
 ; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; FIXED-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; FIXED-NEXT:    [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; FIXED-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
+; FIXED-NEXT:    [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
 ; FIXED-NEXT:    [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 ; FIXED-NEXT:    store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 4
@@ -248,8 +248,8 @@ define void @load_store_factor2_i64(ptr %p) {
 ; FIXED-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP2]], align 8
 ; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; FIXED-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], <i64 1, i64 1, i64 1, i64 1>
-; FIXED-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], <i64 2, i64 2, i64 2, i64 2>
+; FIXED-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
 ; FIXED-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; FIXED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
 ; FIXED-NEXT:    store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP2]], align 8
@@ -372,9 +372,9 @@ define void @load_store_factor3_i32(ptr %p) {
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
 ; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
 ; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; CHECK-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
+; CHECK-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
+; CHECK-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
@@ -425,9 +425,9 @@ define void @load_store_factor3_i32(ptr %p) {
 ; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
 ; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
 ; FIXED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; FIXED-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; FIXED-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; FIXED-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; FIXED-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
+; FIXED-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
+; FIXED-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
 ; FIXED-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; FIXED-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; FIXED-NEXT:    [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
@@ -478,9 +478,9 @@ define void @load_store_factor3_i32(ptr %p) {
 ; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21>
 ; SCALABLE-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 1, i32 4, i32 7, i32 10, i32 13, i32 16, i32 19, i32 22>
 ; SCALABLE-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <24 x i32> [[WIDE_VEC]], <24 x i32> poison, <8 x i32> <i32 2, i32 5, i32 8, i32 11, i32 14, i32 17, i32 20, i32 23>
-; SCALABLE-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-; SCALABLE-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; SCALABLE-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; SCALABLE-NEXT:    [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], splat (i32 1)
+; SCALABLE-NEXT:    [[TMP6:%.*]] = add <8 x i32> [[STRIDED_VEC1]], splat (i32 2)
+; SCALABLE-NEXT:    [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], splat (i32 3)
 ; SCALABLE-NEXT:    [[TMP11:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; SCALABLE-NEXT:    [[TMP12:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SCALABLE-NEXT:    [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <24 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
@@ -562,9 +562,9 @@ define void @load_store_factor3_i64(ptr %p) {
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
 ; CHECK-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
 ; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], <i64 1, i64 1, i64 1, i64 1>
-; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], <i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], <i64 3, i64 3, i64 3, i64 3>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
@@ -615,9 +615,9 @@ define void @load_store_factor3_i64(ptr %p) {
 ; FIXED-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
 ; FIXED-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
 ; FIXED-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; FIXED-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], <i64 1, i64 1, i64 1, i64 1>
-; FIXED-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], <i64 2, i64 2, i64 2, i64 2>
-; FIXED-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], <i64 3, i64 3, i64 3, i64 3>
+; FIXED-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; FIXED-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
 ; FIXED-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; FIXED-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
 ; FIXED-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
@@ -668,9 +668,9 @@ define void @load_store_factor3_i64(ptr %p) {
 ; SCALABLE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
 ; SCALABLE-NEXT:    [[STRIDED_VEC1:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
 ; SCALABLE-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i64> [[WIDE_VEC]], <12 x i64> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; SCALABLE-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], <i64 1, i64 1, i64 1, i64 1>
-; SCALABLE-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], <i64 2, i64 2, i64 2, i64 2>
-; SCALABLE-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], <i64 3, i64 3, i64 3, i64 3>
+; SCALABLE-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], splat (i64 1)
+; SCALABLE-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; SCALABLE-NEXT:    [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], splat (i64 3)
 ; SCALABLE-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SCALABLE-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
 ; SCALABLE-NEXT:    [[TMP13:%.*]] = shufflevector <8 x i64> [[TMP11]], <8 x i64> [[TMP12]], <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
@@ -757,14 +757,14 @@ define void @load_store_factor8(ptr %p) {
 ; CHECK-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 5, i32 13>
 ; CHECK-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 6, i32 14>
 ; CHECK-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 7, i32 15>
-; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], <i64 2, i64 2>
-; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], <i64 3, i64 3>
-; CHECK-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], <i64 4, i64 4>
-; CHECK-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], <i64 5, i64 5>
-; CHECK-NEXT:    [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], <i64 6, i64 6>
-; CHECK-NEXT:    [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], <i64 7, i64 7>
-; CHECK-NEXT:    [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], <i64 8, i64 8>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; CHECK-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; CHECK-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; CHECK-NEXT:    [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; CHECK-NEXT:    [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
+; CHECK-NEXT:    [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8)
 ; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -849,14 +849,14 @@ define void @load_store_factor8(ptr %p) {
 ; FIXED-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 5, i32 13>
 ; FIXED-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 6, i32 14>
 ; FIXED-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 7, i32 15>
-; FIXED-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], <i64 1, i64 1>
-; FIXED-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], <i64 2, i64 2>
-; FIXED-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], <i64 3, i64 3>
-; FIXED-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], <i64 4, i64 4>
-; FIXED-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], <i64 5, i64 5>
-; FIXED-NEXT:    [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], <i64 6, i64 6>
-; FIXED-NEXT:    [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], <i64 7, i64 7>
-; FIXED-NEXT:    [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], <i64 8, i64 8>
+; FIXED-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; FIXED-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; FIXED-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; FIXED-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; FIXED-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; FIXED-NEXT:    [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; FIXED-NEXT:    [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
+; FIXED-NEXT:    [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8)
 ; FIXED-NEXT:    [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; FIXED-NEXT:    [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; FIXED-NEXT:    [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -941,14 +941,14 @@ define void @load_store_factor8(ptr %p) {
 ; SCALABLE-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 5, i32 13>
 ; SCALABLE-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 6, i32 14>
 ; SCALABLE-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <16 x i64> [[WIDE_VEC]], <16 x i64> poison, <2 x i32> <i32 7, i32 15>
-; SCALABLE-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], <i64 1, i64 1>
-; SCALABLE-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], <i64 2, i64 2>
-; SCALABLE-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], <i64 3, i64 3>
-; SCALABLE-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], <i64 4, i64 4>
-; SCALABLE-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], <i64 5, i64 5>
-; SCALABLE-NEXT:    [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], <i64 6, i64 6>
-; SCALABLE-NEXT:    [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], <i64 7, i64 7>
-; SCALABLE-NEXT:    [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], <i64 8, i64 8>
+; SCALABLE-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[STRIDED_VEC]], splat (i64 1)
+; SCALABLE-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[STRIDED_VEC1]], splat (i64 2)
+; SCALABLE-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[STRIDED_VEC2]], splat (i64 3)
+; SCALABLE-NEXT:    [[TMP10:%.*]] = add <2 x i64> [[STRIDED_VEC3]], splat (i64 4)
+; SCALABLE-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[STRIDED_VEC4]], splat (i64 5)
+; SCALABLE-NEXT:    [[TMP14:%.*]] = add <2 x i64> [[STRIDED_VEC5]], splat (i64 6)
+; SCALABLE-NEXT:    [[TMP16:%.*]] = add <2 x i64> [[STRIDED_VEC6]], splat (i64 7)
+; SCALABLE-NEXT:    [[TMP19:%.*]] = add <2 x i64> [[STRIDED_VEC7]], splat (i64 8)
 ; SCALABLE-NEXT:    [[TMP21:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SCALABLE-NEXT:    [[TMP22:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; SCALABLE-NEXT:    [[TMP23:%.*]] = shufflevector <2 x i64> [[TMP12]], <2 x i64> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
index ec50b0cac03829..07a5c53894cdec 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
@@ -261,7 +261,7 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <16 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <16 x i8> [[WIDE_LOAD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1
@@ -322,7 +322,7 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <32 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <32 x i8> [[WIDE_LOAD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1
@@ -382,7 +382,7 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
-; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <8 x i8> [[WIDE_LOAD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
index f63b1b73dc522b..02b7ce3795ad52 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
@@ -9,8 +9,8 @@ define void @test(ptr %p, i64 %a, i8 %b) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], <i64 48, i64 48, i64 48, i64 48>
-; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], <i64 52, i64 52, i64 52, i64 52>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], splat (i64 48)
+; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], splat (i64 52)
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
@@ -21,10 +21,10 @@ define void @test(ptr %p, i64 %a, i8 %b) {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 3)
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], <i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]]
@@ -54,7 +54,7 @@ define void @test(ptr %p, i64 %a, i8 %b) {
 ; CHECK-NEXT:    store i8 [[TMP16]], ptr [[P]], align 1
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
 ; CHECK:       pred.store.continue8:
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
 ; CHECK-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
index 06a4f98d3dc726..fadaf54255a77d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll
@@ -379,10 +379,10 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
 ; FIXEDLEN:       vector.body:
 ; FIXEDLEN-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; FIXEDLEN-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; FIXEDLEN-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; FIXEDLEN-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; FIXEDLEN-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; FIXEDLEN-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], <i64 10, i64 10, i64 10, i64 10>
-; FIXEDLEN-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], <i64 10, i64 10, i64 10, i64 10>
+; FIXEDLEN-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10)
+; FIXEDLEN-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], splat (i64 10)
 ; FIXEDLEN-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP1]], <4 x i64> poison)
 ; FIXEDLEN-NEXT:    [[WIDE_MASKED_GATHER1:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison)
 ; FIXEDLEN-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer
@@ -393,7 +393,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
 ; FIXEDLEN-NEXT:    store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8
 ; FIXEDLEN-NEXT:    store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8
 ; FIXEDLEN-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; FIXEDLEN-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
+; FIXEDLEN-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
 ; FIXEDLEN-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; FIXEDLEN-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; FIXEDLEN:       middle.block:
@@ -490,7 +490,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
 ; TF-FIXEDLEN-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; TF-FIXEDLEN-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; TF-FIXEDLEN-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025)
-; TF-FIXEDLEN-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], <i64 10, i64 10, i64 10, i64 10>
+; TF-FIXEDLEN-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10)
 ; TF-FIXEDLEN-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
 ; TF-FIXEDLEN-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison)
 ; TF-FIXEDLEN-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer
@@ -498,7 +498,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca
 ; TF-FIXEDLEN-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; TF-FIXEDLEN-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP4]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
 ; TF-FIXEDLEN-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; TF-FIXEDLEN-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; TF-FIXEDLEN-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; TF-FIXEDLEN-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
 ; TF-FIXEDLEN-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; TF-FIXEDLEN:       middle.block:
@@ -1184,10 +1184,10 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
 ; FIXEDLEN:       vector.body:
 ; FIXEDLEN-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; FIXEDLEN-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; FIXEDLEN-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; FIXEDLEN-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; FIXEDLEN-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; FIXEDLEN-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], <i64 10, i64 10, i64 10, i64 10>
-; FIXEDLEN-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], <i64 10, i64 10, i64 10, i64 10>
+; FIXEDLEN-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10)
+; FIXEDLEN-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], splat (i64 10)
 ; FIXEDLEN-NEXT:    call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP1]])
 ; FIXEDLEN-NEXT:    call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP2]])
 ; FIXEDLEN-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
@@ -1196,7 +1196,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
 ; FIXEDLEN-NEXT:    store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8
 ; FIXEDLEN-NEXT:    store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8
 ; FIXEDLEN-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; FIXEDLEN-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
+; FIXEDLEN-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
 ; FIXEDLEN-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; FIXEDLEN-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; FIXEDLEN:       middle.block:
@@ -1294,14 +1294,14 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc
 ; TF-FIXEDLEN-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; TF-FIXEDLEN-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; TF-FIXEDLEN-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025)
-; TF-FIXEDLEN-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], <i64 10, i64 10, i64 10, i64 10>
+; TF-FIXEDLEN-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], splat (i64 10)
 ; TF-FIXEDLEN-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
 ; TF-FIXEDLEN-NEXT:    call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP2]])
 ; TF-FIXEDLEN-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
 ; TF-FIXEDLEN-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; TF-FIXEDLEN-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]])
 ; TF-FIXEDLEN-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; TF-FIXEDLEN-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; TF-FIXEDLEN-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; TF-FIXEDLEN-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028
 ; TF-FIXEDLEN-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; TF-FIXEDLEN:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
index f82611f414de2d..4329afdb757f80 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
@@ -115,12 +115,12 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) {
 ; NO-VP:       vector.ph:
 ; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
 ; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; NO-VP-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, i32 [[START:%.*]], i32 0
+; NO-VP-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32> splat (i32 1), i32 [[START:%.*]], i32 0
 ; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; NO-VP:       vector.body:
 ; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; NO-VP-NEXT:    [[VEC_PHI1:%.*]] = phi <8 x i32> [ <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; NO-VP-NEXT:    [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; NO-VP-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
 ; NO-VP-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]]
 ; NO-VP-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
@@ -864,12 +864,12 @@ define float @fmul(ptr %a, i64 %n, float %start) {
 ; NO-VP:       vector.ph:
 ; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
 ; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; NO-VP-NEXT:    [[TMP0:%.*]] = insertelement <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, float [[START:%.*]], i32 0
+; NO-VP-NEXT:    [[TMP0:%.*]] = insertelement <8 x float> splat (float 1.000000e+00), float [[START:%.*]], i32 0
 ; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; NO-VP:       vector.body:
 ; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi <8 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; NO-VP-NEXT:    [[VEC_PHI1:%.*]] = phi <8 x float> [ <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; NO-VP-NEXT:    [[VEC_PHI1:%.*]] = phi <8 x float> [ splat (float 1.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; NO-VP-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
 ; NO-VP-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP1]]
 ; NO-VP-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll
index 2dd47d5c1ea8a7..ea064c75309857 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-safe-dep-distance.ll
@@ -429,7 +429,7 @@ define void @no_high_lmul_or_interleave(ptr %p) {
 ; IF-EVL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
 ; IF-EVL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; IF-EVL-NEXT:    [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
-; IF-EVL-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], <i64 3001, i64 3001, i64 3001, i64 3001>
+; IF-EVL-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], splat (i64 3001)
 ; IF-EVL-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]]
 ; IF-EVL-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0
 ; IF-EVL-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr [[TMP3]], i32 32, <4 x i1> [[TMP1]], <4 x i64> poison)
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
index 3477c8d879106b..107ae504612968 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll
@@ -16,7 +16,7 @@ define void @test_scalar_steps_target_instruction_cost(ptr %dst) {
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1>
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IV]], <i64 8, i64 8>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IV]], splat (i64 8)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
index 5e8f287217478b..69c31fd6d731dc 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/pr47665.ll
@@ -23,7 +23,7 @@ define void @test(ptr %p, i40 %a) {
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i32> poison, i32 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT2]], <16 x i32> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <16 x i32> [[BROADCAST_SPLAT3]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <16 x i32> [[VEC_IV]], splat (i32 9)
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <16 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
index 9a1e613a736bf3..699b8487a065a5 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/predicated-first-order-recurrence.ll
@@ -22,7 +22,7 @@ define void @func_21() {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], <i64 4, i64 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -59,7 +59,7 @@ define void @func_21() {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
 ; CHECK:       pred.store.continue4:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 6
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -78,7 +78,7 @@ define void @func_21() {
 ; CHECK-NEXT:    store i32 [[SCALAR_RECUR]], ptr [[B_PTR]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 5
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll
index 0418f93498bc20..3363aea39e73f6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -30,7 +30,7 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    store <32 x i8> [[VEC_IND]], ptr [[TMP6]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i8> [[VEC_IND]], <i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32, i8 32>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i8> [[VEC_IND]], splat (i8 32)
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -61,7 +61,7 @@ define i32 @conversion_cost1(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 0
 ; CHECK-NEXT:    store <16 x i8> [[VEC_IND9]], ptr [[TMP11]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT12]] = add nuw i64 [[INDEX8]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT10]] = add <16 x i8> [[VEC_IND9]], <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; CHECK-NEXT:    [[VEC_IND_NEXT10]] = add <16 x i8> [[VEC_IND9]], splat (i8 16)
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT12]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
@@ -118,15 +118,15 @@ define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 9, i64 10>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
-; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <2 x i64> [[STEP_ADD1]], <i64 2, i64 2>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD_2:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD_3:%.*]] = add <2 x i64> [[STEP_ADD_2]], splat (i64 2)
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 9, [[INDEX]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <2 x i64> [[VEC_IND]], <i64 3, i64 3>
-; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <2 x i64> [[STEP_ADD]], <i64 3, i64 3>
-; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[STEP_ADD1]], <i64 3, i64 3>
-; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[STEP_ADD2]], <i64 3, i64 3>
+; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <2 x i64> [[VEC_IND]], splat (i64 3)
+; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <2 x i64> [[STEP_ADD]], splat (i64 3)
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[STEP_ADD_2]], splat (i64 3)
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[STEP_ADD_3]], splat (i64 3)
 ; CHECK-NEXT:    [[TMP12:%.*]] = sitofp <2 x i64> [[TMP8]] to <2 x float>
 ; CHECK-NEXT:    [[TMP13:%.*]] = sitofp <2 x i64> [[TMP9]] to <2 x float>
 ; CHECK-NEXT:    [[TMP14:%.*]] = sitofp <2 x i64> [[TMP10]] to <2 x float>
@@ -141,7 +141,7 @@ define i32 @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwin
 ; CHECK-NEXT:    store <2 x float> [[TMP14]], ptr [[TMP22]], align 4
 ; CHECK-NEXT:    store <2 x float> [[TMP15]], ptr [[TMP23]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD2]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD_3]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
index 6060a7318deb64..8b47aee6bf389b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll
@@ -15,10 +15,10 @@ define i64 @test_foldable_live_in_via_scev() {
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ <i64 1, i64 1>, %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i64> [ <i64 1, i64 1>, %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0]] = mul <2 x i64> [[VEC_PHI]], <i64 2, i64 2>
-; CHECK-NEXT:    [[TMP1]] = mul <2 x i64> [[VEC_PHI1]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 1), %[[VECTOR_PH]] ], [ [[TMP1:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0]] = mul <2 x i64> [[VEC_PHI]], splat (i64 2)
+; CHECK-NEXT:    [[TMP1]] = mul <2 x i64> [[VEC_PHI1]], splat (i64 2)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
 ; CHECK-NEXT:    br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -73,8 +73,8 @@ define i64 @second_lshr_operand_zero_via_scev() {
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; CHECK-NEXT:    [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND2]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i64> [[STEP_ADD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], zeroinitializer
@@ -88,8 +88,8 @@ define i64 @second_lshr_operand_zero_via_scev() {
 ; CHECK-NEXT:    [[TMP10]] = or <2 x i64> [[TMP8]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP11]] = or <2 x i64> [[TMP9]], [[VEC_PHI1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
-; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i32> [[STEP_ADD4]], <i32 2, i32 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
+; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i32> [[STEP_ADD4]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
 ; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
index 8e5bf27acc64f1..121f2b306de6da 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -782,10 +782,10 @@ define i64 @cost_assume(ptr %end, i64 %N) {
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI4:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP7]] = add <2 x i64> [[VEC_PHI]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP8]] = add <2 x i64> [[VEC_PHI2]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP9]] = add <2 x i64> [[VEC_PHI3]], <i64 1, i64 1>
-; CHECK-NEXT:    [[TMP10]] = add <2 x i64> [[VEC_PHI4]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP7]] = add <2 x i64> [[VEC_PHI]], splat (i64 1)
+; CHECK-NEXT:    [[TMP8]] = add <2 x i64> [[VEC_PHI2]], splat (i64 1)
+; CHECK-NEXT:    [[TMP9]] = add <2 x i64> [[VEC_PHI3]], splat (i64 1)
+; CHECK-NEXT:    [[TMP10]] = add <2 x i64> [[VEC_PHI4]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP11]])
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP11]])
@@ -851,13 +851,13 @@ define void @reduction_store(ptr noalias %src, ptr %dst, i1 %x) #2 {
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[X:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext <4 x i1> [[BROADCAST_SPLAT]] to <4 x i64>
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[TMP0]], <i64 12, i64 12, i64 12, i64 12>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[TMP0]], splat (i64 12)
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32>
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 0, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP5]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
@@ -936,7 +936,7 @@ define i64 @live_in_known_1_via_scev() {
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], <i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 5)
 ; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[VEC_PHI]], <4 x i64> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
@@ -998,7 +998,7 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ <i64 1, i64 1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP3]] = mul <2 x i64> [[TMP2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
index 4c3c2e89cd204b..c861aa8172b9b8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/divs-with-tail-folding.ll
@@ -156,7 +156,7 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
 ; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 0
 ; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> zeroinitializer, ptr [[TMP29]], i32 8, <4 x i1> [[TMP8]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP30]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
index f3875ccb41668c..9092da6092595a 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll
@@ -40,15 +40,15 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = trunc i64 [[OFFSET_IDX]] to i32
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], -1
-; CHECK-NEXT:    [[TMP13:%.*]] = mul <16 x i32> [[VEC_IND]], <i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608>
-; CHECK-NEXT:    [[TMP14:%.*]] = lshr exact <16 x i32> [[TMP13]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP13:%.*]] = mul <16 x i32> [[VEC_IND]], splat (i32 196608)
+; CHECK-NEXT:    [[TMP14:%.*]] = lshr exact <16 x i32> [[TMP13]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP15:%.*]] = trunc <16 x i32> [[TMP14]] to <16 x i16>
 ; CHECK-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP12]] to i64
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i16, ptr [[ARR:%.*]], i64 [[TMP16]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i16, ptr [[TMP17]], i32 0
 ; CHECK-NEXT:    store <16 x i16> [[TMP15]], ptr [[TMP18]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -77,15 +77,15 @@ define void @test_pr59459(i64 %iv.start, ptr %arr) {
 ; CHECK-NEXT:    [[TMP21:%.*]] = trunc i64 [[OFFSET_IDX15]] to i32
 ; CHECK-NEXT:    [[TMP22:%.*]] = add i32 [[TMP21]], 0
 ; CHECK-NEXT:    [[TMP23:%.*]] = add i32 [[TMP22]], -1
-; CHECK-NEXT:    [[TMP24:%.*]] = mul <8 x i32> [[VEC_IND13]], <i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608, i32 196608>
-; CHECK-NEXT:    [[TMP25:%.*]] = lshr exact <8 x i32> [[TMP24]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP24:%.*]] = mul <8 x i32> [[VEC_IND13]], splat (i32 196608)
+; CHECK-NEXT:    [[TMP25:%.*]] = lshr exact <8 x i32> [[TMP24]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP26:%.*]] = trunc <8 x i32> [[TMP25]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP27:%.*]] = zext i32 [[TMP23]] to i64
 ; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i16, ptr [[ARR]], i64 [[TMP27]]
 ; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr i16, ptr [[TMP28]], i32 0
 ; CHECK-NEXT:    store <8 x i16> [[TMP26]], ptr [[TMP29]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT16]] = add nuw i64 [[INDEX9]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT14]] = add <8 x i32> [[VEC_IND13]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT14]] = add <8 x i32> [[VEC_IND13]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC4]]
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
@@ -145,92 +145,90 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[L]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = mul <16 x i16> <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>, [[TMP2]]
-
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <16 x i16> splat (i16 16), [[TMP2]]
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i16> [[DOTSPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = mul <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, [[DOTSPLAT]]
-; CHECK-NEXT:    [[INDUCTION:%.*]] = add <16 x i16> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
-
+; CHECK-NEXT:    [[TMP21:%.*]] = mul <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, [[DOTSPLAT]]
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <16 x i16> zeroinitializer, [[TMP21]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i16> poison, i16 [[OFF]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i16> [[BROADCAST_SPLATINSERT2]], <16 x i16> poison, <16 x i32> zeroinitializer
 ; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i16
 ; CHECK-NEXT:    [[IND_END:%.*]] = mul i16 [[DOTCAST]], [[TMP0]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <16 x i16> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <16 x i16> [[VEC_IND]], [[DOTSPLAT3]]
-; CHECK-NEXT:    [[STEP_ADD4:%.*]] = add <16 x i16> [[STEP_ADD]], [[DOTSPLAT3]]
-; CHECK-NEXT:    [[STEP_ADD5:%.*]] = add <16 x i16> [[STEP_ADD4]], [[DOTSPLAT3]]
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <16 x i16> [[VEC_IND]], [[TMP1]]
+; CHECK-NEXT:    [[STEP_ADD_2:%.*]] = add <16 x i16> [[STEP_ADD]], [[TMP1]]
+; CHECK-NEXT:    [[STEP_ADD_3:%.*]] = add <16 x i16> [[STEP_ADD_2]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP7:%.*]] = sub <16 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP8:%.*]] = sub <16 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP9:%.*]] = sub <16 x i16> [[STEP_ADD4]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP10:%.*]] = sub <16 x i16> [[STEP_ADD5]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[TMP3]]
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 16
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 32
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 48
-; CHECK-NEXT:    store <16 x i16> [[TMP7]], ptr [[TMP15]], align 2
-; CHECK-NEXT:    store <16 x i16> [[TMP8]], ptr [[TMP16]], align 2
-; CHECK-NEXT:    store <16 x i16> [[TMP9]], ptr [[TMP17]], align 2
-; CHECK-NEXT:    store <16 x i16> [[TMP10]], ptr [[TMP18]], align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <16 x i16> [[VEC_IND]], [[BROADCAST_SPLAT3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = sub <16 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT3]]
+; CHECK-NEXT:    [[TMP6:%.*]] = sub <16 x i16> [[STEP_ADD_2]], [[BROADCAST_SPLAT3]]
+; CHECK-NEXT:    [[TMP7:%.*]] = sub <16 x i16> [[STEP_ADD_3]], [[BROADCAST_SPLAT3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 16
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 32
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 48
+; CHECK-NEXT:    store <16 x i16> [[TMP4]], ptr [[TMP9]], align 2
+; CHECK-NEXT:    store <16 x i16> [[TMP5]], ptr [[TMP10]], align 2
+; CHECK-NEXT:    store <16 x i16> [[TMP6]], ptr [[TMP11]], align 2
+; CHECK-NEXT:    store <16 x i16> [[TMP7]], ptr [[TMP12]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <16 x i16> [[STEP_ADD5]], [[DOTSPLAT3]]
-; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <16 x i16> [[STEP_ADD_3]], [[TMP1]]
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[L]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
 ; CHECK:       vec.epilog.iter.check:
-; CHECK-NEXT:    [[DOTCAST12:%.*]] = trunc i64 [[N_VEC]] to i16
-; CHECK-NEXT:    [[IND_END13:%.*]] = mul i16 [[DOTCAST12]], [[TMP0]]
+; CHECK-NEXT:    [[DOTCAST9:%.*]] = trunc i64 [[N_VEC]] to i16
+; CHECK-NEXT:    [[IND_END10:%.*]] = mul i16 [[DOTCAST9]], [[TMP0]]
 ; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[L]], [[N_VEC]]
 ; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
 ; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
 ; CHECK:       vec.epilog.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT:    [[N_MOD_VF7:%.*]] = urem i64 [[L]], 8
-; CHECK-NEXT:    [[N_VEC8:%.*]] = sub i64 [[L]], [[N_MOD_VF7]]
-; CHECK-NEXT:    [[DOTCAST10:%.*]] = trunc i64 [[N_VEC8]] to i16
-; CHECK-NEXT:    [[IND_END11:%.*]] = mul i16 [[DOTCAST10]], [[TMP0]]
-; CHECK-NEXT:    [[DOTSPLATINSERT17:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT18:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT17]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[DOTSPLATINSERT19:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT20:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT19]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP20:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT20]]
-; CHECK-NEXT:    [[INDUCTION21:%.*]] = add <8 x i16> [[DOTSPLAT18]], [[TMP20]]
-; CHECK-NEXT:    [[TMP21:%.*]] = mul i16 [[TMP0]], 8
-; CHECK-NEXT:    [[DOTSPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[TMP21]], i64 0
-; CHECK-NEXT:    [[DOTSPLAT23:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT27:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT28:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT27]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[N_MOD_VF4:%.*]] = urem i64 [[L]], 8
+; CHECK-NEXT:    [[N_VEC5:%.*]] = sub i64 [[L]], [[N_MOD_VF4]]
+; CHECK-NEXT:    [[DOTCAST7:%.*]] = trunc i64 [[N_VEC5]] to i16
+; CHECK-NEXT:    [[IND_END8:%.*]] = mul i16 [[DOTCAST7]], [[TMP0]]
+; CHECK-NEXT:    [[DOTSPLATINSERT13:%.*]] = insertelement <8 x i16> poison, i16 [[BC_RESUME_VAL]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT14:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT13]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[DOTSPLATINSERT15:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT16:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT15]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP14:%.*]] = mul <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[DOTSPLAT16]]
+; CHECK-NEXT:    [[INDUCTION17:%.*]] = add <8 x i16> [[DOTSPLAT14]], [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = mul i16 [[TMP0]], 8
+; CHECK-NEXT:    [[DOTSPLATINSERT18:%.*]] = insertelement <8 x i16> poison, i16 [[TMP15]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT19:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT18]], <8 x i16> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i16> poison, i16 [[OFF]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT22]], <8 x i16> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
-; CHECK-NEXT:    [[INDEX16:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT29:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND24:%.*]] = phi <8 x i16> [ [[INDUCTION21]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT26:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP22:%.*]] = add i64 [[INDEX16]], 0
-; CHECK-NEXT:    [[TMP23:%.*]] = sub <8 x i16> [[VEC_IND24]], [[BROADCAST_SPLAT28]]
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP22]]
-; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i16, ptr [[TMP24]], i32 0
-; CHECK-NEXT:    store <8 x i16> [[TMP23]], ptr [[TMP25]], align 2
-; CHECK-NEXT:    [[INDEX_NEXT29]] = add nuw i64 [[INDEX16]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT26]] = add <8 x i16> [[VEC_IND24]], [[DOTSPLAT23]]
-; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT29]], [[N_VEC8]]
-; CHECK-NEXT:    br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT24:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND20:%.*]] = phi <8 x i16> [ [[INDUCTION17]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = add i64 [[INDEX12]], 0
+; CHECK-NEXT:    [[TMP17:%.*]] = sub <8 x i16> [[VEC_IND20]], [[BROADCAST_SPLAT23]]
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0
+; CHECK-NEXT:    store <8 x i16> [[TMP17]], ptr [[TMP19]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT24]] = add nuw i64 [[INDEX12]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT21]] = add <8 x i16> [[VEC_IND20]], [[DOTSPLAT19]]
+; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC5]]
+; CHECK-NEXT:    br i1 [[TMP20]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
-; CHECK-NEXT:    [[CMP_N15:%.*]] = icmp eq i64 [[L]], [[N_VEC8]]
-; CHECK-NEXT:    br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT:    [[CMP_N25:%.*]] = icmp eq i64 [[L]], [[N_VEC5]]
+; CHECK-NEXT:    br i1 [[CMP_N25]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
 ; CHECK:       vec.epilog.scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL9:%.*]] = phi i64 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL14:%.*]] = phi i16 [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END13]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL11:%.*]] = phi i16 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END10]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL9]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[P_09:%.*]] = phi i16 [ [[BC_RESUME_VAL14]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[P_09:%.*]] = phi i16 [ [[BC_RESUME_VAL11]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[ADD]] = sub i16 [[P_09]], [[OFF]]
 ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[K]], i64 [[IV]]
 ; CHECK-NEXT:    store i16 [[ADD]], ptr [[ARRAYIDX3]], align 2
@@ -240,6 +238,8 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l,
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
+
+
 entry:
   br label %loop
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll
index dcdc723b4b9b92..49b3df1a97e253 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll
@@ -225,12 +225,12 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP1]] = sub nsw <2 x i64> zeroinitializer, [[STEP_ADD]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP2]], ptr [[GEP]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 36
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index 54dd9c870a1709..39fa87fbb569f6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -32,9 +32,9 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC:       vector.body:
 ; AUTO_VEC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[VEC_IND:%.*]] = phi <8 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00, float 3.000000e+00, float 3.500000e+00, float 4.000000e+00, float 4.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
-; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
-; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[VEC_IND]], <float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01, float 1.200000e+01>
+; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd fast <8 x float> [[VEC_IND]], splat (float 4.000000e+00)
+; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[VEC_IND]], splat (float 8.000000e+00)
+; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[VEC_IND]], splat (float 1.200000e+01)
 ; AUTO_VEC-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; AUTO_VEC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32
 ; AUTO_VEC-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 64
@@ -44,7 +44,7 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
 ; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD2]], ptr [[TMP3]], align 4
 ; AUTO_VEC-NEXT:    store <8 x float> [[STEP_ADD3]], ptr [[TMP4]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; AUTO_VEC-NEXT:    [[VEC_IND_NEXT]] = fadd fast <8 x float> [[VEC_IND]], <float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01, float 1.600000e+01>
+; AUTO_VEC-NEXT:    [[VEC_IND_NEXT]] = fadd fast <8 x float> [[VEC_IND]], splat (float 1.600000e+01)
 ; AUTO_VEC-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; AUTO_VEC-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; AUTO_VEC:       middle.block:
@@ -207,9 +207,9 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
 ; AUTO_VEC:       vector.body:
 ; AUTO_VEC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[VEC_IND:%.*]] = phi <4 x double> [ <double 0.000000e+00, double 3.000000e+00, double 6.000000e+00, double 9.000000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.200000e+01, double 1.200000e+01, double 1.200000e+01, double 1.200000e+01>
-; AUTO_VEC-NEXT:    [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
-; AUTO_VEC-NEXT:    [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
+; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], splat (double 1.200000e+01)
+; AUTO_VEC-NEXT:    [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], splat (double 2.400000e+01)
+; AUTO_VEC-NEXT:    [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], splat (double 3.600000e+01)
 ; AUTO_VEC-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]]
 ; AUTO_VEC-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32
 ; AUTO_VEC-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 64
@@ -219,7 +219,7 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
 ; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_2]], ptr [[TMP3]], align 8
 ; AUTO_VEC-NEXT:    store <4 x double> [[STEP_ADD_3]], ptr [[TMP4]], align 8
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; AUTO_VEC-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], <double 4.800000e+01, double 4.800000e+01, double 4.800000e+01, double 4.800000e+01>
+; AUTO_VEC-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], splat (double 4.800000e+01)
 ; AUTO_VEC-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; AUTO_VEC-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; AUTO_VEC:       middle.block:
@@ -371,9 +371,9 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC:       vector.body:
 ; AUTO_VEC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AUTO_VEC-NEXT:    [[VEC_IND:%.*]] = phi <8 x float> [ <float 1.000000e+00, float 4.300000e+01, float 8.500000e+01, float 1.270000e+02, float 1.690000e+02, float 2.110000e+02, float 2.530000e+02, float 2.950000e+02>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
-; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
+; AUTO_VEC-NEXT:    [[STEP_ADD:%.*]] = fadd reassoc <8 x float> [[VEC_IND]], splat (float 3.360000e+02)
+; AUTO_VEC-NEXT:    [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], splat (float 3.360000e+02)
+; AUTO_VEC-NEXT:    [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], splat (float 3.360000e+02)
 ; AUTO_VEC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P:%.*]], i64 [[INDEX]]
 ; AUTO_VEC-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 32
 ; AUTO_VEC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 64
@@ -391,7 +391,7 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) {
 ; AUTO_VEC-NEXT:    store <8 x float> [[TMP8]], ptr [[TMP4]], align 4
 ; AUTO_VEC-NEXT:    store <8 x float> [[TMP9]], ptr [[TMP5]], align 4
 ; AUTO_VEC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; AUTO_VEC-NEXT:    [[VEC_IND_NEXT]] = fadd reassoc <8 x float> [[STEP_ADD3]], <float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02, float 3.360000e+02>
+; AUTO_VEC-NEXT:    [[VEC_IND_NEXT]] = fadd reassoc <8 x float> [[STEP_ADD3]], splat (float 3.360000e+02)
 ; AUTO_VEC-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; AUTO_VEC-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; AUTO_VEC:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
index 6516b05ab4ede9..9c234c282311e3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -38,7 +38,7 @@ define void @foo1(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n
 ; AVX512-NEXT:    [[TMP6:%.*]] = sext <16 x i32> [[WIDE_MASKED_LOAD]] to <16 x i64>
 ; AVX512-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[IN:%.*]], <16 x i64> [[TMP6]]
 ; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP7]], i32 4, <16 x i1> [[TMP3]], <16 x float> poison)
-; AVX512-NEXT:    [[TMP8:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
+; AVX512-NEXT:    [[TMP8:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01)
 ; AVX512-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[OUT:%.*]], i64 [[TMP0]]
 ; AVX512-NEXT:    [[TMP10:%.*]] = getelementptr float, ptr [[TMP9]], i32 0
 ; AVX512-NEXT:    call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP8]], ptr [[TMP10]], i32 4, <16 x i1> [[TMP3]])
@@ -64,7 +64,7 @@ define void @foo1(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n
 ; FVW2-NEXT:    [[TMP6:%.*]] = sext <2 x i32> [[WIDE_MASKED_LOAD]] to <2 x i64>
 ; FVW2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[IN:%.*]], <2 x i64> [[TMP6]]
 ; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP7]], i32 4, <2 x i1> [[TMP3]], <2 x float> poison)
-; FVW2-NEXT:    [[TMP8:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
+; FVW2-NEXT:    [[TMP8:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01)
 ; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[OUT:%.*]], i64 [[TMP0]]
 ; FVW2-NEXT:    [[TMP10:%.*]] = getelementptr float, ptr [[TMP9]], i32 0
 ; FVW2-NEXT:    call void @llvm.masked.store.v2f32.p0(<2 x float> [[TMP8]], ptr [[TMP10]], i32 4, <2 x i1> [[TMP3]])
@@ -128,11 +128,11 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n
 ; AVX512-NEXT:    [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
 ; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
 ; AVX512-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
-; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
+; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], splat (float 5.000000e-01)
 ; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]]
 ; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16
-; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256>
+; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256)
 ; AVX512-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; AVX512-NEXT:    br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; AVX512:       for.end:
@@ -156,7 +156,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n
 ; FVW2-NEXT:    [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer
 ; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
 ; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
-; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
+; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01)
 ; FVW2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
 ; FVW2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; FVW2:       pred.store.if:
@@ -174,7 +174,7 @@ define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr n
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; FVW2:       pred.store.continue3:
 ; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
-; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 32, i64 32>
+; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32)
 ; FVW2-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; FVW2-NEXT:    br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; FVW2:       for.end:
@@ -235,11 +235,11 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) {
 ; AVX512-NEXT:    [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
 ; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
 ; AVX512-NEXT:    [[WIDE_MASKED_GATHER1:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
-; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER1]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
+; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER1]], splat (float 5.000000e-01)
 ; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_OUT:%.*]], ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]], i32 1
 ; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256>
+; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256)
 ; AVX512-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; AVX512-NEXT:    br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; AVX512:       for.end:
@@ -263,7 +263,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) {
 ; FVW2-NEXT:    [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer
 ; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
 ; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
-; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
+; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01)
 ; FVW2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
 ; FVW2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; FVW2:       pred.store.if:
@@ -281,7 +281,7 @@ define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) {
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; FVW2:       pred.store.continue2:
 ; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 32, i64 32>
+; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32)
 ; FVW2-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; FVW2-NEXT:    br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; FVW2:       for.end:
@@ -329,11 +329,11 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali
 ; AVX512-NEXT:    [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
 ; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
 ; AVX512-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p1(<16 x ptr addrspace(1)> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
-; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
+; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], splat (float 5.000000e-01)
 ; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], <16 x i64> [[VEC_IND]]
 ; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p1(<16 x float> [[TMP3]], <16 x ptr addrspace(1)> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16
-; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256>
+; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256)
 ; AVX512-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; AVX512-NEXT:    br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; AVX512:       for.end:
@@ -357,7 +357,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali
 ; FVW2-NEXT:    [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer
 ; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
 ; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p1(<2 x ptr addrspace(1)> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
-; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
+; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01)
 ; FVW2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
 ; FVW2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; FVW2:       pred.store.if:
@@ -375,7 +375,7 @@ define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noali
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; FVW2:       pred.store.continue3:
 ; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
-; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 32, i64 32>
+; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32)
 ; FVW2-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; FVW2-NEXT:    br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; FVW2:       for.end:
@@ -422,11 +422,11 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal
 ; AVX512-NEXT:    [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
 ; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
 ; AVX512-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p1(<16 x ptr addrspace(1)> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
-; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
+; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], splat (float 5.000000e-01)
 ; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], <16 x i64> [[VEC_IND]]
 ; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[TMP3]], <16 x ptr> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16
-; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256>
+; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256)
 ; AVX512-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; AVX512-NEXT:    br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; AVX512:       for.end:
@@ -450,7 +450,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal
 ; FVW2-NEXT:    [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer
 ; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr addrspace(1) [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
 ; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p1(<2 x ptr addrspace(1)> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
-; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
+; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01)
 ; FVW2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
 ; FVW2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; FVW2:       pred.store.if:
@@ -468,7 +468,7 @@ define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noal
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; FVW2:       pred.store.continue3:
 ; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
-; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 32, i64 32>
+; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32)
 ; FVW2-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; FVW2-NEXT:    br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; FVW2:       for.end:
@@ -515,11 +515,11 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal
 ; AVX512-NEXT:    [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
 ; AVX512-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <16 x i64> [[VEC_IND]], i32 1
 ; AVX512-NEXT:    [[WIDE_MASKED_GATHER2:%.*]] = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> [[TMP2]], i32 4, <16 x i1> [[TMP1]], <16 x float> poison)
-; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>
+; AVX512-NEXT:    [[TMP3:%.*]] = fadd <16 x float> [[WIDE_MASKED_GATHER2]], splat (float 5.000000e-01)
 ; AVX512-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[OUT:%.*]], <16 x i64> [[VEC_IND]]
 ; AVX512-NEXT:    call void @llvm.masked.scatter.v16f32.v16p1(<16 x float> [[TMP3]], <16 x ptr addrspace(1)> [[TMP4]], i32 4, <16 x i1> [[TMP1]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 16
-; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256, i64 256>
+; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 256)
 ; AVX512-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; AVX512-NEXT:    br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; AVX512:       for.end:
@@ -543,7 +543,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal
 ; FVW2-NEXT:    [[TMP8:%.*]] = icmp sgt <2 x i32> [[TMP7]], zeroinitializer
 ; FVW2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_IN:%.*]], ptr [[IN:%.*]], <2 x i64> [[VEC_IND]], i32 1
 ; FVW2-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> [[TMP9]], i32 4, <2 x i1> [[TMP8]], <2 x float> poison)
-; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], <float 5.000000e-01, float 5.000000e-01>
+; FVW2-NEXT:    [[TMP10:%.*]] = fadd <2 x float> [[WIDE_MASKED_GATHER]], splat (float 5.000000e-01)
 ; FVW2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
 ; FVW2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; FVW2:       pred.store.if:
@@ -561,7 +561,7 @@ define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noal
 ; FVW2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; FVW2:       pred.store.continue3:
 ; FVW2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
-; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 32, i64 32>
+; FVW2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 32)
 ; FVW2-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; FVW2-NEXT:    br i1 [[TMP17]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; FVW2:       for.end:
@@ -691,12 +691,12 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt
 ; AVX512-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP27]]
 ; AVX512-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM]]
 ; AVX512-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i32 0
-; AVX512-NEXT:    [[WIDE_LOAD22:%.*]] = load <8 x float>, ptr [[TMP30]], align 4, !alias.scope [[META17:![0-9]+]]
-; AVX512-NEXT:    call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD22]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope [[META20:![0-9]+]], !noalias [[META22:![0-9]+]]
+; AVX512-NEXT:    [[WIDE_LOAD23:%.*]] = load <8 x float>, ptr [[TMP30]], align 4, !alias.scope [[META17:![0-9]+]]
+; AVX512-NEXT:    call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD23]], <8 x ptr> [[TMP26]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope [[META20:![0-9]+]], !noalias [[META22:![0-9]+]]
 ; AVX512-NEXT:    [[TMP31:%.*]] = getelementptr float, ptr [[TMP28]], i32 0
-; AVX512-NEXT:    [[WIDE_LOAD23:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope [[META24:![0-9]+]]
+; AVX512-NEXT:    [[WIDE_LOAD24:%.*]] = load <8 x float>, ptr [[TMP31]], align 4, !alias.scope [[META24:![0-9]+]]
 ; AVX512-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP26]], i64 1
-; AVX512-NEXT:    call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD23]], <8 x ptr> [[TMP32]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope [[META20]], !noalias [[META22]]
+; AVX512-NEXT:    call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD24]], <8 x ptr> [[TMP32]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>), !alias.scope [[META20]], !noalias [[META22]]
 ; AVX512-NEXT:    [[INDEX_NEXT24]] = add nuw i64 [[INDEX18]], 8
 ; AVX512-NEXT:    [[PTR_IND20]] = getelementptr i8, ptr [[POINTER_PHI19]], i64 512
 ; AVX512-NEXT:    [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC10]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll
index 637b985b4562ed..a8a115d5b02118 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gep-use-outside-loop.ll
@@ -18,7 +18,7 @@ define void @gep_use_in_dead_block(ptr noalias %dst, ptr %src) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], <i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 10)
 ; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i16, ptr [[DST]], i64 [[TMP1]]
@@ -97,13 +97,13 @@ define void @gep_use_outside_loop(ptr noalias %dst, ptr %src) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i16, ptr [[SRC]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i16, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], <i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i16> [[WIDE_LOAD]], splat (i16 10)
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i16, ptr [[TMP6]], i32 0
 ; CHECK-NEXT:    call void @llvm.masked.store.v4i16.p0(<4 x i16> zeroinitializer, ptr [[TMP7]], i32 2, <4 x i1> [[TMP5]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
 ; CHECK-NEXT:    br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
index 8200e7df2a2602..5ba559af077ca6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
@@ -31,9 +31,9 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 {
 ; CHECK-NEXT:    br label [[FOR_BODY3_LR_PH_US:%.*]]
 ; CHECK:       for.end.us:
 ; CHECK-NEXT:    [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDVARS_IV33:%.*]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access [[META0:![0-9]+]]
 ; CHECK-NEXT:    [[ADD10_US:%.*]] = add nsw i32 [[TMP3]], 3
-; CHECK-NEXT:    store i32 [[ADD10_US]], ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access !0
+; CHECK-NEXT:    store i32 [[ADD10_US]], ptr [[ARRAYIDX9_US]], align 4, !llvm.mem.parallel_loop_access [[META0]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT34:%.*]] = add i64 [[INDVARS_IV33]], 1
 ; CHECK-NEXT:    [[LFTR_WIDEIV35:%.*]] = trunc i64 [[INDVARS_IV_NEXT34]] to i32
 ; CHECK-NEXT:    [[EXITCOND36:%.*]] = icmp eq i32 [[LFTR_WIDEIV35]], [[M]]
@@ -44,9 +44,9 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 {
 ; CHECK-NEXT:    [[ADD4_US:%.*]] = add i32 [[ADD_US:%.*]], [[TMP4]]
 ; CHECK-NEXT:    [[IDXPROM_US:%.*]] = sext i32 [[ADD4_US]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4, !llvm.mem.parallel_loop_access !0
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4, !llvm.mem.parallel_loop_access [[META0]]
 ; CHECK-NEXT:    [[ADD5_US:%.*]] = add nsw i32 [[TMP5]], 1
-; CHECK-NEXT:    store i32 [[ADD5_US]], ptr [[ARRAYIDX7_US:%.*]], align 4, !llvm.mem.parallel_loop_access !0
+; CHECK-NEXT:    store i32 [[ADD5_US]], ptr [[ARRAYIDX7_US:%.*]], align 4, !llvm.mem.parallel_loop_access [[META0]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT30]] = add i64 [[INDVARS_IV29]], 1
 ; CHECK-NEXT:    [[LFTR_WIDEIV31:%.*]] = trunc i64 [[INDVARS_IV_NEXT30]] to i32
 ; CHECK-NEXT:    [[EXITCOND32:%.*]] = icmp eq i32 [[LFTR_WIDEIV31]], [[M]]
@@ -77,9 +77,9 @@ define void @foo(ptr nocapture %a, ptr nocapture %b, i32 %k, i32 %m) #0 {
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP16]], align 4
-; CHECK-NEXT:    [[TMP17:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP17:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i32> [[TMP17]], i32 3
-; CHECK-NEXT:    store i32 [[TMP18]], ptr [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access !0
+; CHECK-NEXT:    store i32 [[TMP18]], ptr [[ARRAYIDX7_US]], align 4, !llvm.mem.parallel_loop_access [[META0]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[OFFSET_IDX]], 4
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll
index 0c2ec268ad2070..2dac60a0fcfcba 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll
@@ -80,8 +80,8 @@ define double @sumIfVector(ptr nocapture readonly %arr) {
 ; SSE-NEXT:    [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i32 2
 ; SSE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP4]], align 8
 ; SSE-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8
-; SSE-NEXT:    [[TMP6:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], <double 4.200000e+01, double 4.200000e+01>
-; SSE-NEXT:    [[TMP7:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD2]], <double 4.200000e+01, double 4.200000e+01>
+; SSE-NEXT:    [[TMP6:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], splat (double 4.200000e+01)
+; SSE-NEXT:    [[TMP7:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD2]], splat (double 4.200000e+01)
 ; SSE-NEXT:    [[TMP8:%.*]] = fadd fast <2 x double> [[VEC_PHI]], [[WIDE_LOAD]]
 ; SSE-NEXT:    [[TMP9:%.*]] = fadd fast <2 x double> [[VEC_PHI1]], [[WIDE_LOAD2]]
 ; SSE-NEXT:    [[PREDPHI]] = select <2 x i1> [[TMP6]], <2 x double> [[TMP8]], <2 x double> [[VEC_PHI]]
@@ -139,10 +139,10 @@ define double @sumIfVector(ptr nocapture readonly %arr) {
 ; AVX-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x double>, ptr [[TMP9]], align 8
 ; AVX-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x double>, ptr [[TMP10]], align 8
 ; AVX-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP11]], align 8
-; AVX-NEXT:    [[TMP12:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD]], <double 4.200000e+01, double 4.200000e+01, double 4.200000e+01, double 4.200000e+01>
-; AVX-NEXT:    [[TMP13:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD4]], <double 4.200000e+01, double 4.200000e+01, double 4.200000e+01, double 4.200000e+01>
-; AVX-NEXT:    [[TMP14:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD5]], <double 4.200000e+01, double 4.200000e+01, double 4.200000e+01, double 4.200000e+01>
-; AVX-NEXT:    [[TMP15:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD6]], <double 4.200000e+01, double 4.200000e+01, double 4.200000e+01, double 4.200000e+01>
+; AVX-NEXT:    [[TMP12:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD]], splat (double 4.200000e+01)
+; AVX-NEXT:    [[TMP13:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD4]], splat (double 4.200000e+01)
+; AVX-NEXT:    [[TMP14:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD5]], splat (double 4.200000e+01)
+; AVX-NEXT:    [[TMP15:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD6]], splat (double 4.200000e+01)
 ; AVX-NEXT:    [[TMP16:%.*]] = fadd fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]]
 ; AVX-NEXT:    [[TMP17:%.*]] = fadd fast <4 x double> [[VEC_PHI1]], [[WIDE_LOAD4]]
 ; AVX-NEXT:    [[TMP18:%.*]] = fadd fast <4 x double> [[VEC_PHI2]], [[WIDE_LOAD5]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
index 891d5c34170b90..77606b23ca41f4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll
@@ -18,12 +18,12 @@ define i32 @iv_used_widened_and_truncated(ptr %dst, i64 %N) #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND4:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[STEP_ADD5:%.*]] = add <8 x i32> [[VEC_IND4]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
-; CHECK-NEXT:    [[STEP_ADD6:%.*]] = add <8 x i32> [[STEP_ADD5]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
-; CHECK-NEXT:    [[STEP_ADD7:%.*]] = add <8 x i32> [[STEP_ADD6]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8)
+; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8)
+; CHECK-NEXT:    [[STEP_ADD5:%.*]] = add <8 x i32> [[VEC_IND4]], splat (i32 8)
+; CHECK-NEXT:    [[STEP_ADD6:%.*]] = add <8 x i32> [[STEP_ADD5]], splat (i32 8)
+; CHECK-NEXT:    [[STEP_ADD7:%.*]] = add <8 x i32> [[STEP_ADD6]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr { i32, [8 x i32] }, ptr [[DST]], <8 x i64> [[STEP_ADD1]]
@@ -33,8 +33,8 @@ define i32 @iv_used_widened_and_truncated(ptr %dst, i64 %N) #0 {
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD6]], <8 x ptr> [[TMP3]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[STEP_ADD7]], <8 x ptr> [[TMP4]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[VEC_IND_NEXT9]] = add <8 x i32> [[STEP_ADD7]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], splat (i64 8)
+; CHECK-NEXT:    [[VEC_IND_NEXT9]] = add <8 x i32> [[STEP_ADD7]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -88,24 +88,24 @@ define void @multiple_truncated_ivs_with_wide_uses(i1 %c, ptr %A, ptr %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
-; CHECK-NEXT:    [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
+; CHECK-NEXT:    [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C]], <4 x i16> [[VEC_IND]], <4 x i16> <i16 10, i16 10, i16 10, i16 10>
-; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[C]], <4 x i16> [[STEP_ADD]], <4 x i16> <i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[C]], <4 x i16> [[VEC_IND]], <4 x i16> splat (i16 10)
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[C]], <4 x i16> [[STEP_ADD]], <4 x i16> splat (i16 10)
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i16, ptr [[A]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4
-; CHECK-NEXT:    store <4 x i16> [[TMP2]], ptr [[TMP6]], align 2, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
-; CHECK-NEXT:    store <4 x i16> [[TMP3]], ptr [[TMP7]], align 2, !alias.scope [[META4]], !noalias [[META7]]
+; CHECK-NEXT:    store <4 x i16> [[TMP1]], ptr [[TMP6]], align 2, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]]
+; CHECK-NEXT:    store <4 x i16> [[TMP2]], ptr [[TMP7]], align 2, !alias.scope [[META4]], !noalias [[META7]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4
 ; CHECK-NEXT:    store <4 x i32> [[VEC_IND3]], ptr [[TMP10]], align 4, !alias.scope [[META7]]
 ; CHECK-NEXT:    store <4 x i32> [[STEP_ADD4]], ptr [[TMP11]], align 4, !alias.scope [[META7]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
-; CHECK-NEXT:    [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       middle.block:
@@ -158,18 +158,18 @@ define void @truncated_ivs_with_wide_and_scalar_uses(i1 %c, ptr %dst) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i16> [ <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i16> [[VEC_IND]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i16> [[VEC_IND]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[VEC_IND]], <8 x i16> <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
-; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[STEP_ADD]], <8 x i16> <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[VEC_IND]], <8 x i16> splat (i16 10)
+; CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[STEP_ADD]], <8 x i16> splat (i16 10)
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i16, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i16, ptr [[TMP3]], i32 8
 ; CHECK-NEXT:    store <8 x i16> [[TMP5]], ptr [[TMP7]], align 2
 ; CHECK-NEXT:    store <8 x i16> [[TMP6]], ptr [[TMP8]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i16> [[STEP_ADD]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i16> [[STEP_ADD]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK:       middle.block:
@@ -273,7 +273,7 @@ define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 {
 ; CHECK-NEXT:    [[TMP21:%.*]] = zext <16 x i8> [[WIDE_LOAD24]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP22]] = add <16 x i32> [[TMP19]], [[TMP21]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP22]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
-; CHECK-NEXT:    [[TMP24:%.*]] = lshr <16 x i32> [[TMP23]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP24:%.*]] = lshr <16 x i32> [[TMP23]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP25:%.*]] = trunc <16 x i32> [[TMP24]] to <16 x i8>
 ; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <16 x i8> [[TMP25]], i32 0
 ; CHECK-NEXT:    store i8 [[TMP26]], ptr [[NEXT_GEP]], align 1, !alias.scope [[META16:![0-9]+]], !noalias [[META13]]
@@ -382,11 +382,11 @@ define i16 @iv_and_step_trunc() {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i16> [ <i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i16>
 ; CHECK-NEXT:    [[TMP2]] = mul <2 x i16> [[VEC_IND1]], [[TMP1]]
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], <i16 2, i16 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; CHECK:       middle.block:
@@ -441,9 +441,9 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8)
+; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule <8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ule <8 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
@@ -471,7 +471,7 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 {
 ; CHECK-NEXT:    call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr [[TMP26]], i32 4, <8 x i1> [[TMP10]])
 ; CHECK-NEXT:    call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP23]], ptr [[TMP27]], i32 4, <8 x i1> [[TMP11]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], splat (i64 8)
 ; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
 ; CHECK-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; CHECK:       middle.block:
@@ -571,7 +571,7 @@ define void @wide_iv_trunc(ptr %dst, i64 %N) {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; CHECK:       middle.block:
@@ -640,6 +640,58 @@ exit:
 }
 
 define void @wombat(i32 %arg, ptr %dst) #1 {
+; CHECK-LABEL: define void @wombat(
+; CHECK-SAME: i32 [[ARG:%.*]], ptr [[DST:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[ARG]], 3
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[ARG]] to i64
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 56, [[ARG]]
+; CHECK-NEXT:    [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]]
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT3]]
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[ARG]], 8
+; CHECK-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT4]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 4, [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], splat (i32 12)
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    store <8 x i32> [[TMP5]], ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], [[DOTSPLAT5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 56
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 60, [[MIDDLE_BLOCK]] ], [ 4, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[MUL]], [[ENTRY]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr [[DST]], i64 [[PHI]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[PHI2]], 12
+; CHECK-NEXT:    store i32 [[AND]], ptr [[GETELEMENTPTR]], align 4
+; CHECK-NEXT:    [[MUL3:%.*]] = mul i64 [[PHI]], [[ZEXT]]
+; CHECK-NEXT:    [[ADD]] = add i64 [[PHI]], 1
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i64 [[PHI]], 65
+; CHECK-NEXT:    [[TRUNC]] = trunc i64 [[MUL3]] to i32
+; CHECK-NEXT:    br i1 [[ICMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   %mul = mul i32 %arg, 3
   %zext = zext i32 %arg to i64
@@ -662,6 +714,59 @@ exit:
 }
 
 define void @wombat2(i32 %arg, ptr %dst) #1 {
+; CHECK-LABEL: define void @wombat2(
+; CHECK-SAME: i32 [[ARG:%.*]], ptr [[DST:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[ARG]], 3
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[ARG]] to i64
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 56, [[ARG]]
+; CHECK-NEXT:    [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]]
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT3]]
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[ARG]], 8
+; CHECK-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT4]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 4, [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], splat (i32 12)
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    store <8 x i32> [[TMP5]], ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], [[DOTSPLAT5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 56
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 60, [[MIDDLE_BLOCK]] ], [ 4, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[MUL]], [[ENTRY]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr [[DST]], i64 [[PHI]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[PHI2]], 12
+; CHECK-NEXT:    store i32 [[AND]], ptr [[GETELEMENTPTR]], align 4
+; CHECK-NEXT:    [[MUL3:%.*]] = mul i64 [[PHI]], [[ZEXT]]
+; CHECK-NEXT:    [[ADD]] = add i64 [[PHI]], 1
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i64 [[PHI]], 65
+; CHECK-NEXT:    [[TRUNC_0:%.*]] = trunc i64 [[MUL3]] to i60
+; CHECK-NEXT:    [[TRUNC_1]] = trunc i60 [[TRUNC_0]] to i32
+; CHECK-NEXT:    br i1 [[ICMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   %mul = mul i32 %arg, 3
   %zext = zext i32 %arg to i64
@@ -686,6 +791,59 @@ exit:
 
 
 define void @with_dead_use(i32 %arg, ptr %dst) #1 {
+; CHECK-LABEL: define void @with_dead_use(
+; CHECK-SAME: i32 [[ARG:%.*]], ptr [[DST:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[ARG]], 3
+; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[ARG]] to i64
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 56, [[ARG]]
+; CHECK-NEXT:    [[IND_END:%.*]] = add i32 [[MUL]], [[TMP0]]
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[MUL]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <8 x i32> poison, i32 [[ARG]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT2]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[DOTSPLAT3]]
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <8 x i32> [[DOTSPLAT]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[ARG]], 8
+; CHECK-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <8 x i32> poison, i32 [[TMP2]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <8 x i32> [[DOTSPLATINSERT4]], <8 x i32> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 4, [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = and <8 x i32> [[VEC_IND]], splat (i32 12)
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    store <8 x i32> [[TMP5]], ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], [[DOTSPLAT5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 56
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 60, [[MIDDLE_BLOCK]] ], [ 4, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[MUL]], [[ENTRY]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr i32, ptr [[DST]], i64 [[PHI]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[PHI2]], 12
+; CHECK-NEXT:    store i32 [[AND]], ptr [[GETELEMENTPTR]], align 4
+; CHECK-NEXT:    [[MUL3:%.*]] = mul i64 [[PHI]], [[ZEXT]]
+; CHECK-NEXT:    [[ADD]] = add i64 [[PHI]], 1
+; CHECK-NEXT:    [[ICMP:%.*]] = icmp ugt i64 [[PHI]], 65
+; CHECK-NEXT:    [[TRUNC]] = trunc i64 [[MUL3]] to i32
+; CHECK-NEXT:    [[DEAD_AND:%.*]] = and i32 [[TRUNC]], 123
+; CHECK-NEXT:    br i1 [[ICMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   %mul = mul i32 %arg, 3
   %zext = zext i32 %arg to i64
@@ -737,4 +895,10 @@ attributes #1 = { "target-cpu"="skylake-avx512" "target-features"="-avx512f" }
 ; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META2]], [[META1]]}
 ; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]}
 ; CHECK: [[LOOP25]] = distinct !{[[LOOP25]], [[META2]], [[META1]]}
+; CHECK: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]}
+; CHECK: [[LOOP27]] = distinct !{[[LOOP27]], [[META2]], [[META1]]}
+; CHECK: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]}
+; CHECK: [[LOOP29]] = distinct !{[[LOOP29]], [[META2]], [[META1]]}
+; CHECK: [[LOOP30]] = distinct !{[[LOOP30]], [[META1]], [[META2]]}
+; CHECK: [[LOOP31]] = distinct !{[[LOOP31]], [[META2]], [[META1]]}
 ;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
index 7d1d326641e124..bd0f2b11271365 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-cost.ll
@@ -495,7 +495,7 @@ define void @geps_feeding_interleave_groups_with_reuse2(ptr %A, ptr %B, i64 %N)
 ; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i32> [[TMP63]], <32 x i32> poison, <32 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28, i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 2, i32 6, i32 10, i32 14, i32 18, i32 22, i32 26, i32 30, i32 3, i32 7, i32 11, i32 15, i32 19, i32 23, i32 27, i32 31>
 ; CHECK-NEXT:    store <32 x i32> [[INTERLEAVED_VEC]], ptr [[TMP56]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP64]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll
index 717dbe359f1050..1f27fc331d0cd7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/interleaved-accesses-sink-store-across-load.ll
@@ -7,8 +7,8 @@ target triple = "x86_64-apple-macos"
 ; that store into the last store (by creating an interleaved store group). This
 ; means the loaded %l2 will have incorrect value.
 define void @avoid_sinking_store_across_load(ptr %arr) {
-; CHECK-LABEL: define void @avoid_sinking_store_across_load
-; CHECK-SAME: (ptr [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-LABEL: define void @avoid_sinking_store_across_load(
+; CHECK-SAME: ptr [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
@@ -24,22 +24,22 @@ define void @avoid_sinking_store_across_load(ptr %arr) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <12 x i32>, ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[VEC_IND2]]
-; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i32> [[STRIDED_VEC]], <i32 25, i32 25, i32 25, i32 25>
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP8]], <4 x ptr> [[TMP7]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP7]], i32 0
-; CHECK-NEXT:    [[WIDE_VEC4:%.*]] = load <12 x i32>, ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[VEC_IND2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ARR]], <4 x i64> [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = mul <4 x i32> [[STRIDED_VEC]], splat (i32 25)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP7]], <4 x ptr> [[TMP6]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x ptr> [[TMP6]], i32 0
+; CHECK-NEXT:    [[WIDE_VEC4:%.*]] = load <12 x i32>, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <12 x i32> [[WIDE_VEC4]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
 ; CHECK-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <12 x i32> [[WIDE_VEC4]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
-; CHECK-NEXT:    [[TMP11:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[STRIDED_VEC5]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP11]], <4 x ptr> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[STRIDED_VEC6]], [[STRIDED_VEC5]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP9]], <4 x ptr> [[TMP4]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
-; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 12, i64 12, i64 12, i64 12>
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
+; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 12)
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
index 8e934dcfae5510..1433e48690bc60 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll
@@ -31,9 +31,9 @@ define i32 @test_explicit_pred(i64 %len) {
 ; CHECK-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
+; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
@@ -57,7 +57,7 @@ define i32 @test_explicit_pred(i64 %len) {
 ; CHECK-NEXT:    [[TMP18]] = add <4 x i32> [[VEC_PHI5]], [[PREDPHI11]]
 ; CHECK-NEXT:    [[TMP19]] = add <4 x i32> [[VEC_PHI6]], [[PREDPHI12]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll
index ecae00807f4e72..1a7d5d5459aa0d 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll
@@ -17,9 +17,9 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], splat (i64 8)
+; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], splat (i64 8)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule <8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ule <8 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
@@ -47,7 +47,7 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 {
 ; CHECK-NEXT:    call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr [[TMP26]], i32 4, <8 x i1> [[TMP10]])
 ; CHECK-NEXT:    call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP23]], ptr [[TMP27]], i32 4, <8 x i1> [[TMP11]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], splat (i64 8)
 ; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
 ; CHECK-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -181,8 +181,8 @@ define void @test_scalar_cost_single_store_loop_varying_cond(ptr %dst, ptr noali
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
 ; CHECK-NEXT:    [[WIDE_VEC4:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <16 x i32> [[WIDE_VEC4]], <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC]], <i32 123, i32 123, i32 123, i32 123>
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC5]], <i32 123, i32 123, i32 123, i32 123>
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC]], splat (i32 123)
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC5]], splat (i32 123)
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4
 ; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP10]], i32 4, <4 x i1> [[TMP8]])
diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index d2600cd59b6dfe..01006bb1a2072e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -39,7 +39,7 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; AVX1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP2]]
 ; AVX1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
 ; AVX1-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4
-; AVX1-NEXT:    [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX1-NEXT:    [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100)
 ; AVX1-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP2]]
 ; AVX1-NEXT:    [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0
 ; AVX1-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x i32> poison)
@@ -102,10 +102,10 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; AVX2-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4
 ; AVX2-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4
 ; AVX2-NEXT:    [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4
-; AVX2-NEXT:    [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX2-NEXT:    [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX2-NEXT:    [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX2-NEXT:    [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX2-NEXT:    [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100)
+; AVX2-NEXT:    [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], splat (i32 100)
+; AVX2-NEXT:    [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100)
+; AVX2-NEXT:    [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100)
 ; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP2]]
 ; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
 ; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 8
@@ -185,10 +185,10 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; AVX512-NEXT:    [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4
 ; AVX512-NEXT:    [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4
 ; AVX512-NEXT:    [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4
-; AVX512-NEXT:    [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX512-NEXT:    [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], splat (i32 100)
+; AVX512-NEXT:    [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], splat (i32 100)
+; AVX512-NEXT:    [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100)
+; AVX512-NEXT:    [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100)
 ; AVX512-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP2]]
 ; AVX512-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
 ; AVX512-NEXT:    [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 16
@@ -227,7 +227,7 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; AVX512-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP27]]
 ; AVX512-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP28]], i32 0
 ; AVX512-NEXT:    [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr [[TMP29]], align 4
-; AVX512-NEXT:    [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX512-NEXT:    [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], splat (i32 100)
 ; AVX512-NEXT:    [[TMP31:%.*]] = getelementptr i32, ptr [[B]], i64 [[TMP27]]
 ; AVX512-NEXT:    [[TMP32:%.*]] = getelementptr i32, ptr [[TMP31]], i32 0
 ; AVX512-NEXT:    [[WIDE_MASKED_LOAD13:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP32]], i32 4, <8 x i1> [[TMP30]], <8 x i32> poison)
@@ -314,7 +314,7 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc
 ; AVX1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[TMP2]]
 ; AVX1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 0
 ; AVX1-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP4]], align 4
-; AVX1-NEXT:    [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX1-NEXT:    [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100)
 ; AVX1-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP2]]
 ; AVX1-NEXT:    [[TMP7:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP6]], i32 0
 ; AVX1-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x i32> poison)
@@ -377,10 +377,10 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc
 ; AVX2-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP5]], align 4
 ; AVX2-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP6]], align 4
 ; AVX2-NEXT:    [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP7]], align 4
-; AVX2-NEXT:    [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX2-NEXT:    [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX2-NEXT:    [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX2-NEXT:    [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX2-NEXT:    [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100)
+; AVX2-NEXT:    [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], splat (i32 100)
+; AVX2-NEXT:    [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100)
+; AVX2-NEXT:    [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100)
 ; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP2]]
 ; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 0
 ; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 8
@@ -460,10 +460,10 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc
 ; AVX512-NEXT:    [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP5]], align 4
 ; AVX512-NEXT:    [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP6]], align 4
 ; AVX512-NEXT:    [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP7]], align 4
-; AVX512-NEXT:    [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX512-NEXT:    [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], splat (i32 100)
+; AVX512-NEXT:    [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], splat (i32 100)
+; AVX512-NEXT:    [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100)
+; AVX512-NEXT:    [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100)
 ; AVX512-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP2]]
 ; AVX512-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 0
 ; AVX512-NEXT:    [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 16
@@ -502,7 +502,7 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc
 ; AVX512-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[TMP27]]
 ; AVX512-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP28]], i32 0
 ; AVX512-NEXT:    [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP29]], align 4
-; AVX512-NEXT:    [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX512-NEXT:    [[TMP30:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], splat (i32 100)
 ; AVX512-NEXT:    [[TMP31:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[TMP27]]
 ; AVX512-NEXT:    [[TMP32:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP31]], i32 0
 ; AVX512-NEXT:    [[WIDE_MASKED_LOAD13:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) [[TMP32]], i32 4, <8 x i1> [[TMP30]], <8 x i32> poison)
@@ -598,7 +598,7 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; AVX1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP2]]
 ; AVX1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
 ; AVX1-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4
-; AVX1-NEXT:    [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX1-NEXT:    [[TMP5:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100)
 ; AVX1-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP2]]
 ; AVX1-NEXT:    [[TMP7:%.*]] = getelementptr float, ptr [[TMP6]], i32 0
 ; AVX1-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x float> poison)
@@ -663,10 +663,10 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; AVX2-NEXT:    [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4
 ; AVX2-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4
 ; AVX2-NEXT:    [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4
-; AVX2-NEXT:    [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX2-NEXT:    [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX2-NEXT:    [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX2-NEXT:    [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX2-NEXT:    [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100)
+; AVX2-NEXT:    [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD5]], splat (i32 100)
+; AVX2-NEXT:    [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100)
+; AVX2-NEXT:    [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100)
 ; AVX2-NEXT:    [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP2]]
 ; AVX2-NEXT:    [[TMP13:%.*]] = getelementptr float, ptr [[TMP12]], i32 0
 ; AVX2-NEXT:    [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 8
@@ -751,10 +751,10 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; AVX512-NEXT:    [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4
 ; AVX512-NEXT:    [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4
 ; AVX512-NEXT:    [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4
-; AVX512-NEXT:    [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX512-NEXT:    [[TMP8:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], splat (i32 100)
+; AVX512-NEXT:    [[TMP9:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD5]], splat (i32 100)
+; AVX512-NEXT:    [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100)
+; AVX512-NEXT:    [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100)
 ; AVX512-NEXT:    [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP2]]
 ; AVX512-NEXT:    [[TMP13:%.*]] = getelementptr float, ptr [[TMP12]], i32 0
 ; AVX512-NEXT:    [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 16
@@ -797,7 +797,7 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; AVX512-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[TMP31]]
 ; AVX512-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 0
 ; AVX512-NEXT:    [[WIDE_LOAD12:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4
-; AVX512-NEXT:    [[TMP34:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX512-NEXT:    [[TMP34:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD12]], splat (i32 100)
 ; AVX512-NEXT:    [[TMP35:%.*]] = getelementptr float, ptr [[B]], i64 [[TMP31]]
 ; AVX512-NEXT:    [[TMP36:%.*]] = getelementptr float, ptr [[TMP35]], i32 0
 ; AVX512-NEXT:    [[WIDE_MASKED_LOAD13:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP36]], i32 4, <8 x i1> [[TMP34]], <8 x float> poison)
@@ -904,10 +904,10 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; AVX-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META8]]
 ; AVX-NEXT:    [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8]]
 ; AVX-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META8]]
-; AVX-NEXT:    [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100>
-; AVX-NEXT:    [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100>
-; AVX-NEXT:    [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100>
-; AVX-NEXT:    [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], <i32 100, i32 100, i32 100, i32 100>
+; AVX-NEXT:    [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 100)
+; AVX-NEXT:    [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], splat (i32 100)
+; AVX-NEXT:    [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], splat (i32 100)
+; AVX-NEXT:    [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], splat (i32 100)
 ; AVX-NEXT:    [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP0]]
 ; AVX-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i32 0
 ; AVX-NEXT:    [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 4
@@ -992,10 +992,10 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; AVX512-NEXT:    [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META11]]
 ; AVX512-NEXT:    [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META11]]
 ; AVX512-NEXT:    [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META11]]
-; AVX512-NEXT:    [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP7:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD8]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
+; AVX512-NEXT:    [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100)
+; AVX512-NEXT:    [[TMP7:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100)
+; AVX512-NEXT:    [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100)
+; AVX512-NEXT:    [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD8]], splat (i32 100)
 ; AVX512-NEXT:    [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[TMP0]]
 ; AVX512-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i32 0
 ; AVX512-NEXT:    [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 8
@@ -1138,8 +1138,8 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; AVX512-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; AVX512-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], <8 x i64> [[VEC_IND]]
 ; AVX512-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP0]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> poison), !alias.scope [[META21:![0-9]+]]
-; AVX512-NEXT:    [[TMP1:%.*]] = icmp slt <8 x i32> [[WIDE_MASKED_GATHER]], <i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100, i32 100>
-; AVX512-NEXT:    [[TMP2:%.*]] = shl nuw nsw <8 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+; AVX512-NEXT:    [[TMP1:%.*]] = icmp slt <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 100)
+; AVX512-NEXT:    [[TMP2:%.*]] = shl nuw nsw <8 x i64> [[VEC_IND]], splat (i64 1)
 ; AVX512-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, ptr [[B]], <8 x i64> [[TMP2]]
 ; AVX512-NEXT:    [[WIDE_MASKED_GATHER6:%.*]] = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> [[TMP3]], i32 8, <8 x i1> [[TMP1]], <8 x double> poison), !alias.scope [[META24:![0-9]+]]
 ; AVX512-NEXT:    [[TMP4:%.*]] = sitofp <8 x i32> [[WIDE_MASKED_GATHER]] to <8 x double>
@@ -1147,7 +1147,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea
 ; AVX512-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, ptr [[A]], <8 x i64> [[VEC_IND]]
 ; AVX512-NEXT:    call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> [[TMP5]], <8 x ptr> [[TMP6]], i32 8, <8 x i1> [[TMP1]]), !alias.scope [[META26:![0-9]+]], !noalias [[META28:![0-9]+]]
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 128, i64 128, i64 128, i64 128, i64 128, i64 128, i64 128, i64 128>
+; AVX512-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 128)
 ; AVX512-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 624
 ; AVX512-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
 ; AVX512:       middle.block:
@@ -1306,10 +1306,10 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr
 ; AVX2-NEXT:    [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX2-NEXT:    [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP22]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope [[META21]]
 ; AVX2-NEXT:    [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; AVX2-NEXT:    [[TMP23:%.*]] = fadd <4 x double> [[REVERSE13]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
-; AVX2-NEXT:    [[TMP24:%.*]] = fadd <4 x double> [[REVERSE16]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
-; AVX2-NEXT:    [[TMP25:%.*]] = fadd <4 x double> [[REVERSE19]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
-; AVX2-NEXT:    [[TMP26:%.*]] = fadd <4 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
+; AVX2-NEXT:    [[TMP23:%.*]] = fadd <4 x double> [[REVERSE13]], splat (double 5.000000e-01)
+; AVX2-NEXT:    [[TMP24:%.*]] = fadd <4 x double> [[REVERSE16]], splat (double 5.000000e-01)
+; AVX2-NEXT:    [[TMP25:%.*]] = fadd <4 x double> [[REVERSE19]], splat (double 5.000000e-01)
+; AVX2-NEXT:    [[TMP26:%.*]] = fadd <4 x double> [[REVERSE22]], splat (double 5.000000e-01)
 ; AVX2-NEXT:    [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP0]]
 ; AVX2-NEXT:    [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0
 ; AVX2-NEXT:    [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -3
@@ -1418,10 +1418,10 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr
 ; AVX512-NEXT:    [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP13]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; AVX512-NEXT:    [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP22]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope [[META34]]
 ; AVX512-NEXT:    [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; AVX512-NEXT:    [[TMP23:%.*]] = fadd <8 x double> [[REVERSE13]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
-; AVX512-NEXT:    [[TMP24:%.*]] = fadd <8 x double> [[REVERSE16]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
-; AVX512-NEXT:    [[TMP25:%.*]] = fadd <8 x double> [[REVERSE19]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
-; AVX512-NEXT:    [[TMP26:%.*]] = fadd <8 x double> [[REVERSE22]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
+; AVX512-NEXT:    [[TMP23:%.*]] = fadd <8 x double> [[REVERSE13]], splat (double 5.000000e-01)
+; AVX512-NEXT:    [[TMP24:%.*]] = fadd <8 x double> [[REVERSE16]], splat (double 5.000000e-01)
+; AVX512-NEXT:    [[TMP25:%.*]] = fadd <8 x double> [[REVERSE19]], splat (double 5.000000e-01)
+; AVX512-NEXT:    [[TMP26:%.*]] = fadd <8 x double> [[REVERSE22]], splat (double 5.000000e-01)
 ; AVX512-NEXT:    [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[TMP0]]
 ; AVX512-NEXT:    [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0
 ; AVX512-NEXT:    [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -7
@@ -1527,10 +1527,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in
 ; AVX1-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
 ; AVX1-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
 ; AVX1-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
-; AVX1-NEXT:    [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
-; AVX1-NEXT:    [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], <i8 1, i8 1, i8 1, i8 1>
-; AVX1-NEXT:    [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], <i8 1, i8 1, i8 1, i8 1>
-; AVX1-NEXT:    [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], <i8 1, i8 1, i8 1, i8 1>
+; AVX1-NEXT:    [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1)
+; AVX1-NEXT:    [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1)
+; AVX1-NEXT:    [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1)
+; AVX1-NEXT:    [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1)
 ; AVX1-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer
 ; AVX1-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer
 ; AVX1-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer
@@ -1565,10 +1565,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in
 ; AVX1-NEXT:    [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4
 ; AVX1-NEXT:    [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8
 ; AVX1-NEXT:    [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12
-; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]])
-; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]])
-; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]])
-; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]])
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]])
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]])
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]])
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]])
 ; AVX1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; AVX1-NEXT:    [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; AVX1-NEXT:    br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
@@ -1627,10 +1627,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in
 ; AVX2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
 ; AVX2-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
 ; AVX2-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
-; AVX2-NEXT:    [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
-; AVX2-NEXT:    [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], <i8 1, i8 1, i8 1, i8 1>
-; AVX2-NEXT:    [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], <i8 1, i8 1, i8 1, i8 1>
-; AVX2-NEXT:    [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], <i8 1, i8 1, i8 1, i8 1>
+; AVX2-NEXT:    [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1)
+; AVX2-NEXT:    [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1)
+; AVX2-NEXT:    [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1)
+; AVX2-NEXT:    [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1)
 ; AVX2-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer
 ; AVX2-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer
 ; AVX2-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer
@@ -1665,10 +1665,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in
 ; AVX2-NEXT:    [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4
 ; AVX2-NEXT:    [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8
 ; AVX2-NEXT:    [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12
-; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]])
-; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]])
-; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]])
-; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]])
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]])
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]])
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]])
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]])
 ; AVX2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; AVX2-NEXT:    [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; AVX2-NEXT:    br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
@@ -1727,10 +1727,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in
 ; AVX512-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1
 ; AVX512-NEXT:    [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1
 ; AVX512-NEXT:    [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1
-; AVX512-NEXT:    [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; AVX512-NEXT:    [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; AVX512-NEXT:    [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; AVX512-NEXT:    [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; AVX512-NEXT:    [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], splat (i8 1)
+; AVX512-NEXT:    [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], splat (i8 1)
+; AVX512-NEXT:    [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], splat (i8 1)
+; AVX512-NEXT:    [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], splat (i8 1)
 ; AVX512-NEXT:    [[TMP10:%.*]] = icmp eq <8 x i8> [[TMP6]], zeroinitializer
 ; AVX512-NEXT:    [[TMP11:%.*]] = icmp eq <8 x i8> [[TMP7]], zeroinitializer
 ; AVX512-NEXT:    [[TMP12:%.*]] = icmp eq <8 x i8> [[TMP8]], zeroinitializer
@@ -1765,10 +1765,10 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in
 ; AVX512-NEXT:    [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8
 ; AVX512-NEXT:    [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16
 ; AVX512-NEXT:    [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24
-; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]])
-; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]])
-; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]])
-; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]])
+; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]])
+; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]])
+; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]])
+; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
 ; AVX512-NEXT:    [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; AVX512-NEXT:    br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
@@ -1872,10 +1872,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in
 ; AVX1-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
 ; AVX1-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
 ; AVX1-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
-; AVX1-NEXT:    [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
-; AVX1-NEXT:    [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], <i8 1, i8 1, i8 1, i8 1>
-; AVX1-NEXT:    [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], <i8 1, i8 1, i8 1, i8 1>
-; AVX1-NEXT:    [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], <i8 1, i8 1, i8 1, i8 1>
+; AVX1-NEXT:    [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1)
+; AVX1-NEXT:    [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1)
+; AVX1-NEXT:    [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1)
+; AVX1-NEXT:    [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1)
 ; AVX1-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer
 ; AVX1-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer
 ; AVX1-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer
@@ -1910,10 +1910,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in
 ; AVX1-NEXT:    [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4
 ; AVX1-NEXT:    [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8
 ; AVX1-NEXT:    [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12
-; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]])
-; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]])
-; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]])
-; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]])
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]])
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]])
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]])
+; AVX1-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]])
 ; AVX1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; AVX1-NEXT:    [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; AVX1-NEXT:    br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
@@ -1972,10 +1972,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in
 ; AVX2-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
 ; AVX2-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1
 ; AVX2-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
-; AVX2-NEXT:    [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
-; AVX2-NEXT:    [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], <i8 1, i8 1, i8 1, i8 1>
-; AVX2-NEXT:    [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], <i8 1, i8 1, i8 1, i8 1>
-; AVX2-NEXT:    [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], <i8 1, i8 1, i8 1, i8 1>
+; AVX2-NEXT:    [[TMP6:%.*]] = and <4 x i8> [[WIDE_LOAD]], splat (i8 1)
+; AVX2-NEXT:    [[TMP7:%.*]] = and <4 x i8> [[WIDE_LOAD1]], splat (i8 1)
+; AVX2-NEXT:    [[TMP8:%.*]] = and <4 x i8> [[WIDE_LOAD2]], splat (i8 1)
+; AVX2-NEXT:    [[TMP9:%.*]] = and <4 x i8> [[WIDE_LOAD3]], splat (i8 1)
 ; AVX2-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i8> [[TMP6]], zeroinitializer
 ; AVX2-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i8> [[TMP7]], zeroinitializer
 ; AVX2-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i8> [[TMP8]], zeroinitializer
@@ -2010,10 +2010,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in
 ; AVX2-NEXT:    [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4
 ; AVX2-NEXT:    [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8
 ; AVX2-NEXT:    [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12
-; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]])
-; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]])
-; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]])
-; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]])
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <4 x i1> [[TMP31]])
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <4 x i1> [[TMP32]])
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <4 x i1> [[TMP33]])
+; AVX2-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <4 x i1> [[TMP34]])
 ; AVX2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; AVX2-NEXT:    [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; AVX2-NEXT:    br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
@@ -2072,10 +2072,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in
 ; AVX512-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1
 ; AVX512-NEXT:    [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1
 ; AVX512-NEXT:    [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1
-; AVX512-NEXT:    [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; AVX512-NEXT:    [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; AVX512-NEXT:    [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
-; AVX512-NEXT:    [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; AVX512-NEXT:    [[TMP6:%.*]] = and <8 x i8> [[WIDE_LOAD]], splat (i8 1)
+; AVX512-NEXT:    [[TMP7:%.*]] = and <8 x i8> [[WIDE_LOAD1]], splat (i8 1)
+; AVX512-NEXT:    [[TMP8:%.*]] = and <8 x i8> [[WIDE_LOAD2]], splat (i8 1)
+; AVX512-NEXT:    [[TMP9:%.*]] = and <8 x i8> [[WIDE_LOAD3]], splat (i8 1)
 ; AVX512-NEXT:    [[TMP10:%.*]] = icmp eq <8 x i8> [[TMP6]], zeroinitializer
 ; AVX512-NEXT:    [[TMP11:%.*]] = icmp eq <8 x i8> [[TMP7]], zeroinitializer
 ; AVX512-NEXT:    [[TMP12:%.*]] = icmp eq <8 x i8> [[TMP8]], zeroinitializer
@@ -2110,10 +2110,10 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in
 ; AVX512-NEXT:    [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8
 ; AVX512-NEXT:    [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16
 ; AVX512-NEXT:    [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24
-; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]])
-; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]])
-; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]])
-; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>, ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]])
+; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP36]], i32 8, <8 x i1> [[TMP31]])
+; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP37]], i32 8, <8 x i1> [[TMP32]])
+; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP38]], i32 8, <8 x i1> [[TMP33]])
+; AVX512-NEXT:    call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr [[TMP39]], i32 8, <8 x i1> [[TMP34]])
 ; AVX512-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
 ; AVX512-NEXT:    [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; AVX512-NEXT:    br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll
index 3226f72d51d2e4..6c89fa9d9d0476 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/optsize.ll
@@ -21,12 +21,12 @@ define i32 @foo_optsize() #0 {
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], <i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], splat (i32 202)
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]], <64 x i8> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <64 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>, <64 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> splat (i8 2), <64 x i8> splat (i8 1)
 ; CHECK-NEXT:    call void @llvm.masked.store.v64i8.p0(<64 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 64
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -60,12 +60,12 @@ define i32 @foo_optsize() #0 {
 ; AUTOVF-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i64 0
 ; AUTOVF-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 ; AUTOVF-NEXT:    [[VEC_IV:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; AUTOVF-NEXT:    [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], <i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202>
+; AUTOVF-NEXT:    [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], splat (i32 202)
 ; AUTOVF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]]
 ; AUTOVF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; AUTOVF-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]], <32 x i8> poison)
 ; AUTOVF-NEXT:    [[TMP4:%.*]] = icmp eq <32 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer
-; AUTOVF-NEXT:    [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; AUTOVF-NEXT:    [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> splat (i8 2), <32 x i8> splat (i8 1)
 ; AUTOVF-NEXT:    call void @llvm.masked.store.v32i8.p0(<32 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]])
 ; AUTOVF-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 32
 ; AUTOVF-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224
@@ -121,12 +121,12 @@ define i32 @foo_minsize() #1 {
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <64 x i32> poison, i32 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <64 x i32> [[BROADCAST_SPLATINSERT]], <64 x i32> poison, <64 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <64 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], <i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <64 x i32> [[VEC_IV]], splat (i32 202)
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]], <64 x i8> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <64 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>, <64 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = select <64 x i1> [[TMP4]], <64 x i8> splat (i8 2), <64 x i8> splat (i8 1)
 ; CHECK-NEXT:    call void @llvm.masked.store.v64i8.p0(<64 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <64 x i1> [[TMP1]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 64
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
@@ -160,12 +160,12 @@ define i32 @foo_minsize() #1 {
 ; AUTOVF-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[INDEX]], i64 0
 ; AUTOVF-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
 ; AUTOVF-NEXT:    [[VEC_IV:%.*]] = add <32 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; AUTOVF-NEXT:    [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], <i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202, i32 202>
+; AUTOVF-NEXT:    [[TMP1:%.*]] = icmp ule <32 x i32> [[VEC_IV]], splat (i32 202)
 ; AUTOVF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]]
 ; AUTOVF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; AUTOVF-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]], <32 x i8> poison)
 ; AUTOVF-NEXT:    [[TMP4:%.*]] = icmp eq <32 x i8> [[WIDE_MASKED_LOAD]], zeroinitializer
-; AUTOVF-NEXT:    [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; AUTOVF-NEXT:    [[TMP5:%.*]] = select <32 x i1> [[TMP4]], <32 x i8> splat (i8 2), <32 x i8> splat (i8 1)
 ; AUTOVF-NEXT:    call void @llvm.masked.store.v32i8.p0(<32 x i8> [[TMP5]], ptr [[TMP3]], i32 1, <32 x i1> [[TMP1]])
 ; AUTOVF-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 32
 ; AUTOVF-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 224
@@ -230,7 +230,7 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    store <64 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 64
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <64 x i32> [[VEC_IND]], <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <64 x i32> [[VEC_IND]], splat (i32 64)
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -269,7 +269,7 @@ define void @scev4stride1(ptr noalias nocapture %a, ptr noalias nocapture readon
 ; AUTOVF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
 ; AUTOVF-NEXT:    store <8 x i32> [[WIDE_MASKED_GATHER]], ptr [[TMP4]], align 4
 ; AUTOVF-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; AUTOVF-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; AUTOVF-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; AUTOVF-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
 ; AUTOVF-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; AUTOVF:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
index fad9a87e5a01d7..ee8374f952c7a3 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
@@ -26,12 +26,12 @@ define void @foo(ptr %ptr, ptr %ptr.2) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP0]], 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP0]], 3
-; CHECK-NEXT:    store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope !0, !noalias !3
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[PTR_2]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0
-; CHECK-NEXT:    store <4 x i64> [[VEC_IND]], ptr [[TMP7]], align 8, !alias.scope !3
+; CHECK-NEXT:    store <4 x i64> [[VEC_IND]], ptr [[TMP7]], align 8, !alias.scope [[META3]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block:
@@ -53,7 +53,7 @@ define void @foo(ptr %ptr, ptr %ptr.2) {
 ; CHECK-NEXT:    [[TMP12]] = add nuw nsw i64 [[TMP10]], 1
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], 80
 ; CHECK-NEXT:    [[CAN_IV_NEXT]] = add nuw nsw i64 [[CAN_IV]], 1
-; CHECK-NEXT:    br i1 [[TMP13]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
index 5aac001a8b9cf1..44ff7c6db205d0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
@@ -50,9 +50,9 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[STEP_ADD4:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[STEP_ADD5:%.*]] = add <4 x i64> [[STEP_ADD4]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[STEP_ADD4:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
+; CHECK-NEXT:    [[STEP_ADD5:%.*]] = add <4 x i64> [[STEP_ADD4]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND]], i32 0
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 0
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 0
@@ -70,7 +70,7 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP24]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !tbaa [[TBAA10]]
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> [[TMP25]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !tbaa [[TBAA10]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD5]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD5]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
index cc820f2a8f2a05..e2243fb6292970 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr55096-scalarize-add.ll
@@ -37,9 +37,9 @@ define void @test_pr55096(i64 %c, ptr %p) {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE3]]
 ; CHECK:       pred.store.continue3:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 340
-; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP15]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll
index b88f413ff1b81c..df9ba2194139b8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll
@@ -75,10 +75,10 @@ define void @test(ptr %p) {
 ; VEC-NEXT:    store i64 0, ptr [[TMP24]], align 8
 ; VEC-NEXT:    store i64 0, ptr [[TMP25]], align 8
 ; VEC-NEXT:    store i64 0, ptr [[TMP26]], align 8
-; VEC-NEXT:    [[TMP27:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
+; VEC-NEXT:    [[TMP27:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1)
 ; VEC-NEXT:    [[TMP28]] = zext <4 x i16> [[TMP27]] to <4 x i64>
 ; VEC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VEC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
+; VEC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; VEC-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; VEC-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; VEC:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
index 3f38abc75a5837..73bdae71654f12 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
@@ -25,8 +25,8 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 {
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], <i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], splat (i64 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[TMP0]], 1
@@ -34,13 +34,13 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 {
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE]])
+; CHECK-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 -4, i64 -4, i64 -4, i64 -4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    br i1 true, label [[BB6:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    br i1 true, label [[BB6:%.*]], label [[SCALAR_PH]], !prof [[PROF5:![0-9]+]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 87, [[MIDDLE_BLOCK]] ], [ 99, [[BB5:%.*]] ]
 ; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
@@ -48,7 +48,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[IV]], 1
 ; CHECK-NEXT:    [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0
-; CHECK-NEXT:    br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF5:![0-9]+]]
+; CHECK-NEXT:    br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF6:![0-9]+]]
 ; CHECK:       bb18:
 ; CHECK-NEXT:    [[OR:%.*]] = or disjoint i64 [[IV]], 1
 ; CHECK-NEXT:    [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr [[ARR]], i64 [[OR]]
@@ -57,7 +57,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 {
 ; CHECK:       loop.latch:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
 ; CHECK-NEXT:    [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90
-; CHECK-NEXT:    br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       bb6:
 ; CHECK-NEXT:    ret void
 ;
@@ -101,7 +101,8 @@ attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" }
 ; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]}
 ; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
 ; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 1}
-; CHECK: [[PROF6]] = !{!"branch_weights", i32 0, i32 0}
-; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]}
+; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 3}
+; CHECK: [[PROF6]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK: [[PROF7]] = !{!"branch_weights", i32 0, i32 0}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]], [[META3]]}
 ;.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll
index dc9c3e27fe08f2..2a6eac3b53dfa9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll
@@ -27,10 +27,10 @@ define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) {
 ; COST-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
 ; COST-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
 ; COST-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
-; COST-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
+; COST-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
+; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
 ; COST-NEXT:    [[TMP10:%.*]] = or <4 x i1> [[TMP7]], [[TMP8]]
-; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]])
+; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]])
 ; COST-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; COST-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; COST-NEXT:    br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -83,14 +83,14 @@ define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) {
 ; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
 ; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
 ; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
-; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
-; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
+; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
+; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
 ; FORCED-NEXT:    [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
 ; FORCED-NEXT:    [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
 ; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; FORCED-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; FORCED-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -166,12 +166,12 @@ define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr
 ; COST-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
 ; COST-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
 ; COST-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
-; COST-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
+; COST-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
+; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
 ; COST-NEXT:    [[TMP9:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
 ; COST-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
 ; COST-NEXT:    [[TMP11:%.*]] = or <4 x i1> [[TMP10]], [[TMP7]]
-; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]])
+; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]])
 ; COST-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; COST-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; COST-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -225,18 +225,18 @@ define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr
 ; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
 ; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
 ; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
-; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
-; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
+; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
+; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
 ; FORCED-NEXT:    [[TMP13:%.*]] = xor <4 x i1> [[TMP9]], <i1 true, i1 true, i1 true, i1 true>
 ; FORCED-NEXT:    [[TMP14:%.*]] = xor <4 x i1> [[TMP10]], <i1 true, i1 true, i1 true, i1 true>
 ; FORCED-NEXT:    [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
 ; FORCED-NEXT:    [[TMP16:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
 ; FORCED-NEXT:    [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP9]]
 ; FORCED-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP10]]
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP17]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP18]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP17]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP18]])
 ; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; FORCED-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -350,10 +350,10 @@ define void @switch_all_dests_distinct(ptr %start, ptr %end) {
 ; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
 ; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
 ; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
-; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
-; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
+; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
+; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
 ; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
 ; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
 ; FORCED-NEXT:    [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
@@ -362,14 +362,14 @@ define void @switch_all_dests_distinct(ptr %start, ptr %end) {
 ; FORCED-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP14]]
 ; FORCED-NEXT:    [[TMP19:%.*]] = xor <4 x i1> [[TMP17]], <i1 true, i1 true, i1 true, i1 true>
 ; FORCED-NEXT:    [[TMP20:%.*]] = xor <4 x i1> [[TMP18]], <i1 true, i1 true, i1 true, i1 true>
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP13]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP14]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP7]], i32 1, <4 x i1> [[TMP13]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 1, <4 x i1> [[TMP14]])
 ; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP11]])
 ; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP19]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP20]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP19]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP20]])
 ; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; FORCED-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; FORCED-NEXT:    br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -468,17 +468,17 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e
 ; COST-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]]
 ; COST-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
 ; COST-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
-; COST-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
+; COST-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
+; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
 ; COST-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
 ; COST-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
 ; COST-NEXT:    [[TMP11:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
 ; COST-NEXT:    [[TMP12:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
 ; COST-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
-; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP13]])
+; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP6]], i32 1, <4 x i1> [[TMP13]])
 ; COST-NEXT:    [[TMP14:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
 ; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]])
-; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP7]])
+; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP7]])
 ; COST-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; COST-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; COST-NEXT:    br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -544,10 +544,10 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e
 ; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
 ; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
 ; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
-; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
-; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
+; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
+; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
 ; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
 ; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
 ; FORCED-NEXT:    [[TMP15:%.*]] = xor <4 x i1> [[TMP9]], <i1 true, i1 true, i1 true, i1 true>
@@ -558,14 +558,14 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e
 ; FORCED-NEXT:    [[TMP20:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer
 ; FORCED-NEXT:    [[TMP21:%.*]] = select <4 x i1> [[TMP19]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
 ; FORCED-NEXT:    [[TMP22:%.*]] = select <4 x i1> [[TMP20]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP21]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP22]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP7]], i32 1, <4 x i1> [[TMP21]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[TMP8]], i32 1, <4 x i1> [[TMP22]])
 ; FORCED-NEXT:    [[TMP23:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
 ; FORCED-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
 ; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]])
 ; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP9]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP10]])
 ; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; FORCED-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -706,16 +706,16 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) {
 ; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
 ; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
 ; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
-; FORCED-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
+; FORCED-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
 ; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
 ; FORCED-NEXT:    [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], zeroinitializer
-; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
-; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
-; FORCED-NEXT:    [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 14, i64 14, i64 14, i64 14>
-; FORCED-NEXT:    [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 14, i64 14, i64 14, i64 14>
-; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 15, i64 15, i64 15, i64 15>
-; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 15, i64 15, i64 15, i64 15>
+; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
+; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
+; FORCED-NEXT:    [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 14)
+; FORCED-NEXT:    [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 14)
+; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 15)
+; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 15)
 ; FORCED-NEXT:    [[TMP27:%.*]] = or <4 x i1> [[TMP23]], [[TMP25]]
 ; FORCED-NEXT:    [[TMP28:%.*]] = or <4 x i1> [[TMP24]], [[TMP26]]
 ; FORCED-NEXT:    [[TMP21:%.*]] = or <4 x i1> [[TMP13]], [[TMP17]]
@@ -728,10 +728,10 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) {
 ; FORCED-NEXT:    [[TMP40:%.*]] = xor <4 x i1> [[TMP38]], <i1 true, i1 true, i1 true, i1 true>
 ; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP35]])
 ; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP36]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP27]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP28]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP39]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP40]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP27]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP28]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP39]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP40]])
 ; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; FORCED-NEXT:    [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; FORCED-NEXT:    br i1 [[TMP41]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -857,20 +857,20 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
 ; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
 ; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
-; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
-; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
+; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
+; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
 ; FORCED-NEXT:    [[TMP17:%.*]] = or <4 x i1> [[TMP15]], [[TMP11]]
 ; FORCED-NEXT:    [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP12]]
 ; FORCED-NEXT:    [[TMP20:%.*]] = xor <4 x i1> [[TMP17]], <i1 true, i1 true, i1 true, i1 true>
 ; FORCED-NEXT:    [[TMP21:%.*]] = xor <4 x i1> [[TMP18]], <i1 true, i1 true, i1 true, i1 true>
 ; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP11]])
 ; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP20]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP21]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP20]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP21]])
 ; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; FORCED-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
@@ -965,16 +965,16 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end,
 ; COST-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0
 ; COST-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
 ; COST-NEXT:    [[TMP7:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
-; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; COST-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
+; COST-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
+; COST-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
 ; COST-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
 ; COST-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer
 ; COST-NEXT:    [[TMP12:%.*]] = or <4 x i1> [[TMP10]], [[TMP11]]
 ; COST-NEXT:    [[TMP13:%.*]] = xor <4 x i1> [[TMP12]], <i1 true, i1 true, i1 true, i1 true>
 ; COST-NEXT:    [[TMP14:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
 ; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]])
-; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]])
-; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]])
+; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]])
+; COST-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]])
 ; COST-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; COST-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; COST-NEXT:    br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -1041,10 +1041,10 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end,
 ; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
 ; FORCED-NEXT:    [[TMP9:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; FORCED-NEXT:    [[TMP10:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], [[BROADCAST_SPLAT]]
-; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
-; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
+; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
+; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
 ; FORCED-NEXT:    [[TMP15:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
 ; FORCED-NEXT:    [[TMP16:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer
 ; FORCED-NEXT:    [[TMP25:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
@@ -1057,10 +1057,10 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end,
 ; FORCED-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP22]], <4 x i1> zeroinitializer
 ; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP25]])
 ; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP26]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]])
 ; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; FORCED-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
@@ -1194,10 +1194,10 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end,
 ; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
 ; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
 ; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
-; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
-; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
-; FORCED-NEXT:    [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
+; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 -12)
+; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
+; FORCED-NEXT:    [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 13)
 ; FORCED-NEXT:    [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP25]]
 ; FORCED-NEXT:    [[TMP14:%.*]] = or <4 x i1> [[TMP10]], [[TMP26]]
 ; FORCED-NEXT:    [[TMP15:%.*]] = xor <4 x i1> [[TMP13]], <i1 true, i1 true, i1 true, i1 true>
@@ -1214,12 +1214,12 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end,
 ; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP30]])
 ; FORCED-NEXT:    [[TMP32:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP17]], <4 x i1> zeroinitializer
 ; FORCED-NEXT:    [[TMP33:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP32]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP33]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP32]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP33]])
 ; FORCED-NEXT:    [[TMP36:%.*]] = or <4 x i1> [[TMP32]], [[TMP15]]
 ; FORCED-NEXT:    [[TMP37:%.*]] = or <4 x i1> [[TMP33]], [[TMP16]]
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP36]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP37]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP7]], i32 1, <4 x i1> [[TMP36]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[TMP8]], i32 1, <4 x i1> [[TMP37]])
 ; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; FORCED-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; FORCED-NEXT:    br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -1348,24 +1348,24 @@ define void @large_number_of_cases(ptr %start, ptr %end) {
 ; FORCED-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4
 ; FORCED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 1
 ; FORCED-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1
-; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 1, i64 1, i64 1, i64 1>
-; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 1, i64 1, i64 1, i64 1>
-; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 3, i64 3, i64 3, i64 3>
-; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 3, i64 3, i64 3, i64 3>
-; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 11, i64 11, i64 11, i64 11>
-; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 11, i64 11, i64 11, i64 11>
-; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 99, i64 99, i64 99, i64 99>
-; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 99, i64 99, i64 99, i64 99>
-; FORCED-NEXT:    [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 213, i64 213, i64 213, i64 213>
-; FORCED-NEXT:    [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 213, i64 213, i64 213, i64 213>
-; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 238, i64 238, i64 238, i64 238>
-; FORCED-NEXT:    [[TMP20:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 238, i64 238, i64 238, i64 238>
-; FORCED-NEXT:    [[TMP21:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 513, i64 513, i64 513, i64 513>
-; FORCED-NEXT:    [[TMP22:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 513, i64 513, i64 513, i64 513>
-; FORCED-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 791, i64 791, i64 791, i64 791>
-; FORCED-NEXT:    [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 791, i64 791, i64 791, i64 791>
-; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 899, i64 899, i64 899, i64 899>
-; FORCED-NEXT:    [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 899, i64 899, i64 899, i64 899>
+; FORCED-NEXT:    [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 1)
+; FORCED-NEXT:    [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 1)
+; FORCED-NEXT:    [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 3)
+; FORCED-NEXT:    [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 3)
+; FORCED-NEXT:    [[TMP13:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 11)
+; FORCED-NEXT:    [[TMP14:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 11)
+; FORCED-NEXT:    [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 99)
+; FORCED-NEXT:    [[TMP16:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 99)
+; FORCED-NEXT:    [[TMP17:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 213)
+; FORCED-NEXT:    [[TMP18:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 213)
+; FORCED-NEXT:    [[TMP19:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 238)
+; FORCED-NEXT:    [[TMP20:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 238)
+; FORCED-NEXT:    [[TMP21:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 513)
+; FORCED-NEXT:    [[TMP22:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 513)
+; FORCED-NEXT:    [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 791)
+; FORCED-NEXT:    [[TMP24:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 791)
+; FORCED-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 899)
+; FORCED-NEXT:    [[TMP26:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], splat (i64 899)
 ; FORCED-NEXT:    [[TMP27:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
 ; FORCED-NEXT:    [[TMP28:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
 ; FORCED-NEXT:    [[TMP29:%.*]] = or <4 x i1> [[TMP27]], [[TMP13]]
@@ -1382,8 +1382,8 @@ define void @large_number_of_cases(ptr %start, ptr %end) {
 ; FORCED-NEXT:    [[TMP40:%.*]] = or <4 x i1> [[TMP38]], [[TMP24]]
 ; FORCED-NEXT:    [[TMP57:%.*]] = or <4 x i1> [[TMP39]], [[TMP25]]
 ; FORCED-NEXT:    [[TMP58:%.*]] = or <4 x i1> [[TMP40]], [[TMP26]]
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP57]])
-; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP58]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP7]], i32 1, <4 x i1> [[TMP57]])
+; FORCED-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[TMP8]], i32 1, <4 x i1> [[TMP58]])
 ; FORCED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; FORCED-NEXT:    [[TMP59:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; FORCED-NEXT:    br i1 [[TMP59]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll
index 9cd990b9878ce6..95c74d19dd2dbc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll
@@ -124,8 +124,8 @@ define float @reduction_sum_float_only_reassoc(i32 %n, ptr %array) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr float, ptr [[TMP2]], i32 0
@@ -191,8 +191,8 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, ptr %array)
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr float, ptr [[TMP2]], i32 0
@@ -265,8 +265,8 @@ define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x float> [ <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
@@ -348,8 +348,8 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x float> [ <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index ce460f4fe35425..f2cee8cdd2656b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -15,23 +15,253 @@ target triple = "x86_64-apple-macosx10.11.0"
 ; Function Attrs: norecurse nounwind ssp uwtable
 define void @_Z3fn1v() #0 {
 ; CHECK-LABEL: @_Z3fn1v(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @c, align 4
+; CHECK-NEXT:    [[CMP34:%.*]] = icmp sgt i32 [[TMP0]], 8
+; CHECK-NEXT:    br i1 [[CMP34]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.lr.ph:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @a, align 4
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr @b, align 8
+; CHECK-NEXT:    [[MUL:%.*]] = mul i64 [[TMP2]], 4063299859190
+; CHECK-NEXT:    [[TOBOOL6:%.*]] = icmp eq i64 [[MUL]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[TMP0]] to i64
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[ITER_CHECK27:%.*]], label [[ITER_CHECK:%.*]]
+; CHECK:       iter.check:
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i64 [[TMP3]], -9
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP4]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = add nuw i64 [[TMP5]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP6]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP6]], 16
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP6]], 16
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[N_VEC]], 2
+; CHECK-NEXT:    [[IND_END:%.*]] = add i64 8, [[TMP7]]
+; CHECK-NEXT:    [[IND_END4:%.*]] = mul i64 [[N_VEC]], 2
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30, i64 32, i64 34, i64 36, i64 38>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
-; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <16 x i64> [ <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30>, %vector.ph ], [ [[VEC_IND_NEXT4:%.*]], %vector.body ]
-; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw <16 x i64> <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>, [[VEC_IND]]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30, i64 32, i64 34, i64 36, i64 38>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <16 x i64> [ <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw <16 x i64> splat (i64 8), [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP12]], i64 0
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x ptr> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT:    [[TMP14:%.*]] = or disjoint <16 x i64> [[VEC_IND3]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    [[TMP14:%.*]] = or disjoint <16 x i64> [[VEC_IND3]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP15]], i64 0
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x ptr> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-; CHECK-NEXT:    [[VEC_IND_NEXT4]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 32)
+; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <16 x i64> [[VEC_IND3]], splat (i64 32)
+; CHECK-NEXT:    [[TMP63:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP63]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT99:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK:       vec.epilog.iter.check:
+; CHECK-NEXT:    [[IND_END12:%.*]] = mul i64 [[N_VEC]], 2
+; CHECK-NEXT:    [[TMP64:%.*]] = mul i64 [[N_VEC]], 2
+; CHECK-NEXT:    [[IND_END9:%.*]] = add i64 8, [[TMP64]]
+; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP6]], [[N_VEC]]
+; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK:       vec.epilog.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL5:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[N_MOD_VF6:%.*]] = urem i64 [[TMP6]], 8
+; CHECK-NEXT:    [[N_VEC7:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF6]]
+; CHECK-NEXT:    [[TMP17:%.*]] = mul i64 [[N_VEC7]], 2
+; CHECK-NEXT:    [[IND_END8:%.*]] = add i64 8, [[TMP17]]
+; CHECK-NEXT:    [[IND_END11:%.*]] = mul i64 [[N_VEC7]], 2
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <8 x i64> [[DOTSPLAT]], <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14>
+; CHECK-NEXT:    [[DOTSPLATINSERT17:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL5]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT18:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT17]], <8 x i64> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[INDUCTION19:%.*]] = add <8 x i64> [[DOTSPLAT18]], <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14>
+; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK:       vec.epilog.vector.body:
+; CHECK-NEXT:    [[INDEX14:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT22:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND15:%.*]] = phi <8 x i64> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND20:%.*]] = phi <8 x i64> [ [[INDUCTION19]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT21:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND15]]
+; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND15]]
+; CHECK-NEXT:    [[TMP20:%.*]] = add nsw <8 x i64> [[TMP18]], [[VEC_IND20]]
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP19]], <8 x i64> [[TMP20]], i64 0
+; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP21]], i32 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    [[TMP22:%.*]] = or disjoint <8 x i64> [[VEC_IND20]], splat (i64 1)
+; CHECK-NEXT:    [[TMP23:%.*]] = add nsw <8 x i64> [[TMP18]], [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP19]], <8 x i64> [[TMP23]], i64 0
+; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP24]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    [[INDEX_NEXT22]] = add nuw i64 [[INDEX14]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT16]] = add <8 x i64> [[VEC_IND15]], splat (i64 16)
+; CHECK-NEXT:    [[VEC_IND_NEXT21]] = add <8 x i64> [[VEC_IND20]], splat (i64 16)
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT22]], [[N_VEC7]]
+; CHECK-NEXT:    br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    [[CMP_N23:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC7]]
+; CHECK-NEXT:    br i1 [[CMP_N23]], label [[FOR_COND_CLEANUP_LOOPEXIT99]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       vec.epilog.scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL10:%.*]] = phi i64 [ [[IND_END8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], [[VEC_EPILOG_ITER_CHECK]] ], [ 8, [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL13:%.*]] = phi i64 [ [[IND_END11]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       iter.check27:
+; CHECK-NEXT:    [[TMP26:%.*]] = add nsw i64 [[TMP3]], -9
+; CHECK-NEXT:    [[TMP27:%.*]] = lshr i64 [[TMP26]], 1
+; CHECK-NEXT:    [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK25:%.*]] = icmp ult i64 [[TMP28]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK25]], label [[VEC_EPILOG_SCALAR_PH46:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK29:%.*]]
+; CHECK:       vector.main.loop.iter.check29:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK28:%.*]] = icmp ult i64 [[TMP28]], 16
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK28]], label [[VEC_EPILOG_PH47:%.*]], label [[VECTOR_PH30:%.*]]
+; CHECK:       vector.ph30:
+; CHECK-NEXT:    [[N_MOD_VF31:%.*]] = urem i64 [[TMP28]], 16
+; CHECK-NEXT:    [[N_VEC32:%.*]] = sub i64 [[TMP28]], [[N_MOD_VF31]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[TOBOOL6]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i1> [[BROADCAST_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP29:%.*]] = mul i64 [[N_VEC32]], 2
+; CHECK-NEXT:    [[IND_END41:%.*]] = add i64 8, [[TMP29]]
+; CHECK-NEXT:    [[IND_END43:%.*]] = mul i64 [[N_VEC32]], 2
+; CHECK-NEXT:    br label [[VECTOR_BODY33:%.*]]
+; CHECK:       vector.body33:
+; CHECK-NEXT:    [[INDEX34:%.*]] = phi i64 [ 0, [[VECTOR_PH30]] ], [ [[INDEX_NEXT39:%.*]], [[VECTOR_BODY33]] ]
+; CHECK-NEXT:    [[VEC_IND35:%.*]] = phi <16 x i64> [ <i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30, i64 32, i64 34, i64 36, i64 38>, [[VECTOR_PH30]] ], [ [[VEC_IND_NEXT36:%.*]], [[VECTOR_BODY33]] ]
+; CHECK-NEXT:    [[VEC_IND37:%.*]] = phi <16 x i64> [ <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14, i64 16, i64 18, i64 20, i64 22, i64 24, i64 26, i64 28, i64 30>, [[VECTOR_PH30]] ], [ [[VEC_IND_NEXT38:%.*]], [[VECTOR_BODY33]] ]
+; CHECK-NEXT:    [[TMP30:%.*]] = sub nsw <16 x i64> splat (i64 8), [[VEC_IND35]]
+; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND35]]
+; CHECK-NEXT:    [[TMP32:%.*]] = add nsw <16 x i64> [[TMP30]], [[VEC_IND37]]
+; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP32]], i64 0
+; CHECK-NEXT:    [[TMP34:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[TMP34]])
+; CHECK-NEXT:    [[TMP35:%.*]] = or disjoint <16 x i64> [[VEC_IND37]], splat (i64 1)
+; CHECK-NEXT:    [[TMP36:%.*]] = add nsw <16 x i64> [[TMP30]], [[TMP35]]
+; CHECK-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP36]], i64 0
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> [[TMP37]], i32 8, <16 x i1> [[TMP34]])
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> [[TMP33]], i32 16, <16 x i1> [[BROADCAST_SPLAT]])
+; CHECK-NEXT:    [[TMP38:%.*]] = or disjoint <16 x i64> [[VEC_IND37]], splat (i64 1)
+; CHECK-NEXT:    [[TMP39:%.*]] = add nsw <16 x i64> [[TMP30]], [[TMP38]]
+; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP31]], <16 x i64> [[TMP39]], i64 0
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 7), <16 x ptr> [[TMP40]], i32 8, <16 x i1> [[BROADCAST_SPLAT]])
+; CHECK-NEXT:    [[INDEX_NEXT39]] = add nuw i64 [[INDEX34]], 16
+; CHECK-NEXT:    [[VEC_IND_NEXT36]] = add <16 x i64> [[VEC_IND35]], splat (i64 32)
+; CHECK-NEXT:    [[VEC_IND_NEXT38]] = add <16 x i64> [[VEC_IND37]], splat (i64 32)
+; CHECK-NEXT:    [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC32]]
+; CHECK-NEXT:    br i1 [[TMP41]], label [[MIDDLE_BLOCK24:%.*]], label [[VECTOR_BODY33]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       middle.block24:
+; CHECK-NEXT:    [[CMP_N40:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC32]]
+; CHECK-NEXT:    br i1 [[CMP_N40]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK48:%.*]]
+; CHECK:       vec.epilog.iter.check48:
+; CHECK-NEXT:    [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2
+; CHECK-NEXT:    [[TMP42:%.*]] = mul i64 [[N_VEC32]], 2
+; CHECK-NEXT:    [[IND_END55:%.*]] = add i64 8, [[TMP42]]
+; CHECK-NEXT:    [[N_VEC_REMAINING49:%.*]] = sub i64 [[TMP28]], [[N_VEC32]]
+; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_VEC_REMAINING49]], 8
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK50]], label [[VEC_EPILOG_SCALAR_PH46]], label [[VEC_EPILOG_PH47]]
+; CHECK:       vec.epilog.ph47:
+; CHECK-NEXT:    [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ]
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL51:%.*]] = phi i64 [ [[N_VEC32]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK29]] ]
+; CHECK-NEXT:    [[N_MOD_VF52:%.*]] = urem i64 [[TMP28]], 8
+; CHECK-NEXT:    [[N_VEC53:%.*]] = sub i64 [[TMP28]], [[N_MOD_VF52]]
+; CHECK-NEXT:    [[TMP43:%.*]] = mul i64 [[N_VEC53]], 2
+; CHECK-NEXT:    [[IND_END54:%.*]] = add i64 8, [[TMP43]]
+; CHECK-NEXT:    [[IND_END57:%.*]] = mul i64 [[N_VEC53]], 2
+; CHECK-NEXT:    [[DOTSPLATINSERT62:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL42]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT63:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT62]], <8 x i64> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[INDUCTION64:%.*]] = add <8 x i64> [[DOTSPLAT63]], <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14>
+; CHECK-NEXT:    [[DOTSPLATINSERT67:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL44]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], <i64 0, i64 2, i64 4, i64 6, i64 8, i64 10, i64 12, i64 14>
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT72:%.*]] = insertelement <8 x i1> poison, i1 [[TOBOOL6]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT73:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT72]], <8 x i1> poison, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY60:%.*]]
+; CHECK:       vec.epilog.vector.body60:
+; CHECK-NEXT:    [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL51]], [[VEC_EPILOG_PH47]] ], [ [[INDEX_NEXT74:%.*]], [[VEC_EPILOG_VECTOR_BODY60]] ]
+; CHECK-NEXT:    [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], [[VEC_EPILOG_PH47]] ], [ [[VEC_IND_NEXT66:%.*]], [[VEC_EPILOG_VECTOR_BODY60]] ]
+; CHECK-NEXT:    [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], [[VEC_EPILOG_PH47]] ], [ [[VEC_IND_NEXT71:%.*]], [[VEC_EPILOG_VECTOR_BODY60]] ]
+; CHECK-NEXT:    [[TMP44:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND65]]
+; CHECK-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND65]]
+; CHECK-NEXT:    [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]]
+; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP46]], i64 0
+; CHECK-NEXT:    [[TMP48:%.*]] = xor <8 x i1> [[BROADCAST_SPLAT73]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[TMP48]])
+; CHECK-NEXT:    [[TMP49:%.*]] = or disjoint <8 x i64> [[VEC_IND70]], splat (i64 1)
+; CHECK-NEXT:    [[TMP50:%.*]] = add nsw <8 x i64> [[TMP44]], [[TMP49]]
+; CHECK-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP50]], i64 0
+; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> [[TMP51]], i32 8, <8 x i1> [[TMP48]])
+; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> [[TMP47]], i32 16, <8 x i1> [[BROADCAST_SPLAT73]])
+; CHECK-NEXT:    [[TMP52:%.*]] = or disjoint <8 x i64> [[VEC_IND70]], splat (i64 1)
+; CHECK-NEXT:    [[TMP53:%.*]] = add nsw <8 x i64> [[TMP44]], [[TMP52]]
+; CHECK-NEXT:    [[TMP54:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP45]], <8 x i64> [[TMP53]], i64 0
+; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 7), <8 x ptr> [[TMP54]], i32 8, <8 x i1> [[BROADCAST_SPLAT73]])
+; CHECK-NEXT:    [[INDEX_NEXT74]] = add nuw i64 [[INDEX61]], 8
+; CHECK-NEXT:    [[VEC_IND_NEXT66]] = add <8 x i64> [[VEC_IND65]], splat (i64 16)
+; CHECK-NEXT:    [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16)
+; CHECK-NEXT:    [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]]
+; CHECK-NEXT:    br i1 [[TMP55]], label [[VEC_EPILOG_MIDDLE_BLOCK45:%.*]], label [[VEC_EPILOG_VECTOR_BODY60]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       vec.epilog.middle.block45:
+; CHECK-NEXT:    [[CMP_N75:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]]
+; CHECK-NEXT:    br i1 [[CMP_N75]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH46]]
+; CHECK:       vec.epilog.scalar.ph46:
+; CHECK-NEXT:    [[BC_RESUME_VAL56:%.*]] = phi i64 [ [[IND_END54]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END55]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 8, [[ITER_CHECK27]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL59:%.*]] = phi i64 [ [[IND_END57]], [[VEC_EPILOG_MIDDLE_BLOCK45]] ], [ [[IND_END58]], [[VEC_EPILOG_ITER_CHECK48]] ], [ 0, [[ITER_CHECK27]] ]
+; CHECK-NEXT:    br label [[FOR_BODY_US:%.*]]
+; CHECK:       for.body.us:
+; CHECK-NEXT:    [[INDVARS_IV78:%.*]] = phi i64 [ [[INDVARS_IV_NEXT79:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US:%.*]] ], [ [[BC_RESUME_VAL56]], [[VEC_EPILOG_SCALAR_PH46]] ]
+; CHECK-NEXT:    [[INDVARS_IV70:%.*]] = phi i64 [ [[INDVARS_IV_NEXT71:%.*]], [[FOR_COND_CLEANUP4_US_LCSSA_US_US]] ], [ [[BC_RESUME_VAL59]], [[VEC_EPILOG_SCALAR_PH46]] ]
+; CHECK-NEXT:    [[TMP56:%.*]] = sub nsw i64 8, [[INDVARS_IV78]]
+; CHECK-NEXT:    [[ADD_PTR_US:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 [[INDVARS_IV78]]
+; CHECK-NEXT:    [[TMP57:%.*]] = add nsw i64 [[TMP56]], [[INDVARS_IV70]]
+; CHECK-NEXT:    [[ARRAYDECAY_US_US_US:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR_US]], i64 [[TMP57]], i64 0
+; CHECK-NEXT:    br i1 [[TOBOOL6]], label [[FOR_BODY5_US_US_US_PREHEADER:%.*]], label [[FOR_BODY5_US_US48_PREHEADER:%.*]]
+; CHECK:       for.body5.us.us48.preheader:
+; CHECK-NEXT:    store i32 8, ptr [[ARRAYDECAY_US_US_US]], align 16
+; CHECK-NEXT:    [[INDVARS_IV_NEXT66:%.*]] = or disjoint i64 [[INDVARS_IV70]], 1
+; CHECK-NEXT:    [[TMP58:%.*]] = add nsw i64 [[TMP56]], [[INDVARS_IV_NEXT66]]
+; CHECK-NEXT:    [[ARRAYDECAY_US_US55_1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR_US]], i64 [[TMP58]], i64 0
+; CHECK-NEXT:    store i32 8, ptr [[ARRAYDECAY_US_US55_1]], align 8
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP4_US_LCSSA_US_US]]
+; CHECK:       for.body5.us.us.us.preheader:
+; CHECK-NEXT:    store i32 7, ptr [[ARRAYDECAY_US_US_US]], align 16
+; CHECK-NEXT:    [[INDVARS_IV_NEXT73:%.*]] = or disjoint i64 [[INDVARS_IV70]], 1
+; CHECK-NEXT:    [[TMP59:%.*]] = add nsw i64 [[TMP56]], [[INDVARS_IV_NEXT73]]
+; CHECK-NEXT:    [[ARRAYDECAY_US_US_US_1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR_US]], i64 [[TMP59]], i64 0
+; CHECK-NEXT:    store i32 7, ptr [[ARRAYDECAY_US_US_US_1]], align 8
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP4_US_LCSSA_US_US]]
+; CHECK:       for.cond.cleanup4.us-lcssa.us.us:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT79]] = add nuw nsw i64 [[INDVARS_IV78]], 2
+; CHECK-NEXT:    [[CMP_US:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT79]], [[TMP3]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT71]] = add nuw nsw i64 [[INDVARS_IV70]], 2
+; CHECK-NEXT:    br i1 [[CMP_US]], label [[FOR_BODY_US]], label [[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup.loopexit99:
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV95:%.*]] = phi i64 [ [[INDVARS_IV_NEXT96:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL10]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[INDVARS_IV87:%.*]] = phi i64 [ [[INDVARS_IV_NEXT88:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL13]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[TMP60:%.*]] = sub nsw i64 8, [[INDVARS_IV95]]
+; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 [[INDVARS_IV95]]
+; CHECK-NEXT:    [[TMP61:%.*]] = add nsw i64 [[TMP60]], [[INDVARS_IV87]]
+; CHECK-NEXT:    [[ARRAYDECAY_US31:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR]], i64 [[TMP61]], i64 0
+; CHECK-NEXT:    store i32 8, ptr [[ARRAYDECAY_US31]], align 16
+; CHECK-NEXT:    [[INDVARS_IV_NEXT90:%.*]] = or disjoint i64 [[INDVARS_IV87]], 1
+; CHECK-NEXT:    [[TMP62:%.*]] = add nsw i64 [[TMP60]], [[INDVARS_IV_NEXT90]]
+; CHECK-NEXT:    [[ARRAYDECAY_US31_1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ADD_PTR]], i64 [[TMP62]], i64 0
+; CHECK-NEXT:    store i32 8, ptr [[ARRAYDECAY_US31_1]], align 8
+; CHECK-NEXT:    [[INDVARS_IV_NEXT96]] = add nuw nsw i64 [[INDVARS_IV95]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT96]], [[TMP3]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT88]] = add nuw nsw i64 [[INDVARS_IV87]], 2
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT99]], !llvm.loop [[LOOP7:![0-9]+]]
 ;
 entry:
   %0 = load i32, ptr @c, align 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
index dc474fbf67ce8b..99aaecf9a073c4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/small-size.ll
@@ -118,7 +118,7 @@ define void @example2(i32 %n, i32 %x) optsize {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
@@ -410,7 +410,7 @@ define void @example23b(ptr noalias nocapture %src, ptr noalias nocapture %dst)
 ; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[OFFSET_IDX4]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[NEXT_GEP]], align 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw <4 x i32> [[TMP1]], <i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw <4 x i32> [[TMP1]], splat (i32 7)
 ; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr [[NEXT_GEP5]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
@@ -457,7 +457,7 @@ define void @example23c(ptr noalias nocapture %src, ptr noalias nocapture %dst)
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = or disjoint <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i64> [[VEC_IV]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i64> [[VEC_IV]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll
index 4e9b97ef928b13..9c42c3769ec177 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll
@@ -164,10 +164,10 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; CHECK-NEXT:    [[TMP137:%.*]] = mul nsw <8 x i32> [[TMP103]], [[WIDE_LOAD4]]
 ; CHECK-NEXT:    [[TMP138:%.*]] = mul nsw <8 x i32> [[TMP119]], [[WIDE_LOAD5]]
 ; CHECK-NEXT:    [[TMP139:%.*]] = mul nsw <8 x i32> [[TMP135]], [[WIDE_LOAD6]]
-; CHECK-NEXT:    [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], splat (i32 4)
+; CHECK-NEXT:    [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], splat (i32 4)
+; CHECK-NEXT:    [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], splat (i32 4)
+; CHECK-NEXT:    [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP144]] = add <8 x i32> [[TMP140]], [[TMP136]]
 ; CHECK-NEXT:    [[TMP145]] = add <8 x i32> [[TMP141]], [[TMP137]]
 ; CHECK-NEXT:    [[TMP146]] = add <8 x i32> [[TMP142]], [[TMP138]]
@@ -356,10 +356,10 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u
 ; MAX-BW-NEXT:    [[TMP137:%.*]] = mul nsw <8 x i32> [[TMP103]], [[WIDE_LOAD4]]
 ; MAX-BW-NEXT:    [[TMP138:%.*]] = mul nsw <8 x i32> [[TMP119]], [[WIDE_LOAD5]]
 ; MAX-BW-NEXT:    [[TMP139:%.*]] = mul nsw <8 x i32> [[TMP135]], [[WIDE_LOAD6]]
-; MAX-BW-NEXT:    [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
-; MAX-BW-NEXT:    [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
-; MAX-BW-NEXT:    [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
-; MAX-BW-NEXT:    [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+; MAX-BW-NEXT:    [[TMP140:%.*]] = add <8 x i32> [[VEC_PHI]], splat (i32 4)
+; MAX-BW-NEXT:    [[TMP141:%.*]] = add <8 x i32> [[VEC_PHI1]], splat (i32 4)
+; MAX-BW-NEXT:    [[TMP142:%.*]] = add <8 x i32> [[VEC_PHI2]], splat (i32 4)
+; MAX-BW-NEXT:    [[TMP143:%.*]] = add <8 x i32> [[VEC_PHI3]], splat (i32 4)
 ; MAX-BW-NEXT:    [[TMP144]] = add <8 x i32> [[TMP140]], [[TMP136]]
 ; MAX-BW-NEXT:    [[TMP145]] = add <8 x i32> [[TMP141]], [[TMP137]]
 ; MAX-BW-NEXT:    [[TMP146]] = add <8 x i32> [[TMP142]], [[TMP138]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll
index 68c0b8cbc2c4a9..de906598775eb0 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll
@@ -17,7 +17,7 @@ define dso_local void @tail_folding_enabled(ptr noalias nocapture %A, ptr noalia
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], <i64 429, i64 429, i64 429, i64 429, i64 429, i64 429, i64 429, i64 429>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 429)
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison)
@@ -85,7 +85,7 @@ define dso_local void @tail_folding_disabled(ptr noalias nocapture %A, ptr noali
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], <i64 429, i64 429, i64 429, i64 429, i64 429, i64 429, i64 429, i64 429>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 429)
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x i32> poison)
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
index fa0d529116d109..45594b03353361 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_mem_op.ll
@@ -291,8 +291,8 @@ define void @uniform_copy(ptr %A, ptr %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !alias.scope !12
-; CHECK-NEXT:    store i32 [[TMP0]], ptr [[B]], align 4, !alias.scope !15, !noalias !12
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META12:![0-9]+]]
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[B]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META12]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
@@ -349,9 +349,9 @@ define i32 @test_count_bits(ptr %test_base) {
 ; CHECK-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP39:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
+; CHECK-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 8
@@ -376,10 +376,10 @@ define i32 @test_count_bits(ptr %test_base) {
 ; CHECK-NEXT:    [[TMP15:%.*]] = load i8, ptr [[TMP11]], align 1
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x i8> poison, i8 [[TMP15]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT11]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP16:%.*]] = urem <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[TMP17:%.*]] = urem <4 x i64> [[STEP_ADD]], <i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[TMP18:%.*]] = urem <4 x i64> [[STEP_ADD1]], <i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[TMP19:%.*]] = urem <4 x i64> [[STEP_ADD2]], <i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT:    [[TMP16:%.*]] = urem <4 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[TMP17:%.*]] = urem <4 x i64> [[STEP_ADD]], splat (i64 8)
+; CHECK-NEXT:    [[TMP18:%.*]] = urem <4 x i64> [[STEP_ADD_2]], splat (i64 8)
+; CHECK-NEXT:    [[TMP19:%.*]] = urem <4 x i64> [[STEP_ADD_3]], splat (i64 8)
 ; CHECK-NEXT:    [[TMP20:%.*]] = trunc <4 x i64> [[TMP16]] to <4 x i8>
 ; CHECK-NEXT:    [[TMP21:%.*]] = trunc <4 x i64> [[TMP17]] to <4 x i8>
 ; CHECK-NEXT:    [[TMP22:%.*]] = trunc <4 x i64> [[TMP18]] to <4 x i8>
@@ -388,10 +388,10 @@ define i32 @test_count_bits(ptr %test_base) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = lshr <4 x i8> [[BROADCAST_SPLAT8]], [[TMP21]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = lshr <4 x i8> [[BROADCAST_SPLAT10]], [[TMP22]]
 ; CHECK-NEXT:    [[TMP27:%.*]] = lshr <4 x i8> [[BROADCAST_SPLAT12]], [[TMP23]]
-; CHECK-NEXT:    [[TMP28:%.*]] = and <4 x i8> [[TMP24]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP29:%.*]] = and <4 x i8> [[TMP25]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP30:%.*]] = and <4 x i8> [[TMP26]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP31:%.*]] = and <4 x i8> [[TMP27]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP28:%.*]] = and <4 x i8> [[TMP24]], splat (i8 1)
+; CHECK-NEXT:    [[TMP29:%.*]] = and <4 x i8> [[TMP25]], splat (i8 1)
+; CHECK-NEXT:    [[TMP30:%.*]] = and <4 x i8> [[TMP26]], splat (i8 1)
+; CHECK-NEXT:    [[TMP31:%.*]] = and <4 x i8> [[TMP27]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP32:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP33:%.*]] = zext <4 x i8> [[TMP29]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP34:%.*]] = zext <4 x i8> [[TMP30]] to <4 x i32>
@@ -401,7 +401,7 @@ define i32 @test_count_bits(ptr %test_base) {
 ; CHECK-NEXT:    [[TMP38]] = add <4 x i32> [[VEC_PHI5]], [[TMP34]]
 ; CHECK-NEXT:    [[TMP39]] = add <4 x i32> [[VEC_PHI6]], [[TMP35]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096
 ; CHECK-NEXT:    br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
index 094f80e287f0b2..9feeee004025c9 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll
@@ -114,7 +114,7 @@ define void @vectorized1(ptr noalias nocapture %A, ptr noalias nocapture readonl
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], <i64 19, i64 19, i64 19, i64 19, i64 19, i64 19, i64 19, i64 19>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 19)
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[TMP3]], i32 4, <8 x i1> [[TMP1]], <8 x float> poison), !llvm.access.group [[ACC_GRP6:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll
index c6a87fa21fb0cc..3af3ad9ab3cf5b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-interleaved-accesses-gap.ll
@@ -16,7 +16,7 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 {
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], <i64 10000, i64 10000, i64 10000, i64 10000, i64 10000, i64 10000, i64 10000, i64 10000>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IV]], splat (i64 10000)
 ; CHECK-NEXT:    [[TMP2:%.*]] = mul nuw i64 [[TMP0]], 6
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[B:%.*]], align 1, !llvm.access.group [[ACC_GRP0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
@@ -91,7 +91,7 @@ define void @test_pr59090(ptr %l_out, ptr noalias %b) #0 {
 ; CHECK-NEXT:    store i8 0, ptr [[ARRAYIDX97]], align 1, !llvm.access.group [[ACC_GRP0]]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 10000
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll
index 5ec7c1d45f8cf1..85d6c801dee690 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-native-inner-loop-only.ll
@@ -30,7 +30,7 @@ define void @test(ptr %A) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[TMP13:%.*]] = add <4 x i32> [[STRIDED_VEC]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP13:%.*]] = add <4 x i32> [[STRIDED_VEC]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i32> [[TMP13]], i32 0
 ; CHECK-NEXT:    store i32 [[TMP14]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll
index 12f95e0a0a7dc8..6f682c2d60f3f8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll
@@ -14,9 +14,9 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
+; CHECK-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
@@ -27,14 +27,14 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) {
 ; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8
 ; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8
 ; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8
-; CHECK-NEXT:    [[TMP12:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[TMP13:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[TMP14:%.*]] = add <4 x i64> [[STEP_ADD1]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[TMP15:%.*]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], <i64 128, i64 128, i64 128, i64 128>
-; CHECK-NEXT:    [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], <i64 128, i64 128, i64 128, i64 128>
-; CHECK-NEXT:    [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD5]], <i64 128, i64 128, i64 128, i64 128>
-; CHECK-NEXT:    [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD6]], <i64 128, i64 128, i64 128, i64 128>
+; CHECK-NEXT:    [[TMP12:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[TMP13:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
+; CHECK-NEXT:    [[TMP14:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4)
+; CHECK-NEXT:    [[TMP15:%.*]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4)
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], splat (i64 128)
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD4]], splat (i64 128)
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD5]], splat (i64 128)
+; CHECK-NEXT:    [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD6]], splat (i64 128)
 ; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x i64> [[TMP12]], i32 0
 ; CHECK-NEXT:    [[TMP27:%.*]] = add i64 [[TMP26]], 1
 ; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP27]]
@@ -47,7 +47,7 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) {
 ; CHECK-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP14]], ptr [[TMP34]], i32 4, <4 x i1> [[TMP18]])
 ; CHECK-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP15]], ptr [[TMP35]], i32 4, <4 x i1> [[TMP19]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
 ; CHECK-NEXT:    br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -120,10 +120,10 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) {
 ; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8
 ; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8
 ; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP0]], 4
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], <i64 128, i64 128, i64 128, i64 128>
-; CHECK-NEXT:    [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD1]], <i64 128, i64 128, i64 128, i64 128>
-; CHECK-NEXT:    [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD2]], <i64 128, i64 128, i64 128, i64 128>
-; CHECK-NEXT:    [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD3]], <i64 128, i64 128, i64 128, i64 128>
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], splat (i64 128)
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD1]], splat (i64 128)
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD2]], splat (i64 128)
+; CHECK-NEXT:    [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD3]], splat (i64 128)
 ; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[TMP15]], 1
 ; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP23]]
 ; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i64, ptr [[TMP24]], i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
index 968058134690bc..5af8591fd89170 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
@@ -40,7 +40,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
@@ -124,7 +124,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end:
@@ -148,7 +148,7 @@ define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr no
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
 ; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP3]], i32 1, <8 x i1> [[TMP0]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1016
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; ENABLED_MASKED_STRIDED:       for.body:
@@ -213,7 +213,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
@@ -297,7 +297,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end:
@@ -322,7 +322,7 @@ define dso_local void @masked_strided1_optsize(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
 ; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP4]], i32 1, <8 x i1> [[TMP0]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP5]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; ENABLED_MASKED_STRIDED:       for.end:
@@ -402,7 +402,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
@@ -486,7 +486,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP51]], ptr [[TMP52]], i32 1, <8 x i1> [[TMP3]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP53]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end:
@@ -521,7 +521,7 @@ define dso_local void @masked_strided1_optsize_unknown_tc(ptr noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
 ; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP6]], i32 1, <8 x i1> [[TMP4]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
-; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; ENABLED_MASKED_STRIDED:       for.end:
@@ -606,7 +606,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = mul nsw <8 x i32> [[VEC_IND]], splat (i32 3)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
@@ -690,7 +690,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP51]], ptr [[TMP52]], i32 1, <8 x i1> [[TMP3]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP53]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end:
@@ -725,7 +725,7 @@ define dso_local void @masked_strided3_optsize_unknown_tc(ptr noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[Q:%.*]], i32 [[INDEX]]
 ; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[STRIDED_VEC]], ptr [[TMP6]], i32 1, <8 x i1> [[TMP4]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
-; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP7]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; ENABLED_MASKED_STRIDED:       for.end:
@@ -792,7 +792,7 @@ define dso_local void @unconditional_strided1_optsize(ptr noalias nocapture read
 ; DISABLED_MASKED_STRIDED:       vector.body:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = extractelement <8 x i32> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i32 [[TMP1]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <8 x i32> [[TMP0]], i64 1
@@ -828,7 +828,7 @@ define dso_local void @unconditional_strided1_optsize(ptr noalias nocapture read
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    store <8 x i8> [[TMP32]], ptr [[TMP33]], align 1
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP34]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end:
@@ -906,7 +906,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
@@ -990,7 +990,7 @@ define dso_local void @unconditional_strided1_optsize_unknown_tc(ptr noalias noc
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i8, ptr [[Q:%.*]], i32 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v8i8.p0(<8 x i8> [[TMP49]], ptr [[TMP50]], i32 1, <8 x i1> [[TMP0]])
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end:
@@ -1088,7 +1088,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
@@ -1169,7 +1169,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
@@ -1397,7 +1397,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue60:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end:
@@ -1425,7 +1425,7 @@ define dso_local void @masked_strided2(ptr noalias nocapture readonly %p, ptr no
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 ; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP6]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP8]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; ENABLED_MASKED_STRIDED:       for.end:
@@ -1493,7 +1493,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 1024, i32 1023, i32 1022, i32 1021, i32 1020, i32 1019, i32 1018, i32 1017>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
@@ -1574,7 +1574,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
@@ -1802,7 +1802,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue60:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 -8)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end:
@@ -1818,7 +1818,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 1024, i32 1023, i32 1022, i32 1021, i32 1020, i32 1019, i32 1018, i32 1017>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ugt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if:
@@ -1899,7 +1899,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; ENABLED_MASKED_STRIDED:       pred.load.continue14:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], splat (i32 1)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.load.if15:
@@ -2127,7 +2127,7 @@ define dso_local void @masked_strided2_reverse(ptr noalias nocapture readonly %p
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue60:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8>
+; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 -8)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP165]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; ENABLED_MASKED_STRIDED:       for.end:
@@ -2213,7 +2213,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp sgt <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT2]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP0]], <8 x i1> [[TMP1]], <8 x i1> zeroinitializer
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
@@ -2294,7 +2294,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE16]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue16:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = phi <8 x i8> [ [[TMP45]], [[PRED_LOAD_CONTINUE14]] ], [ [[TMP50]], [[PRED_LOAD_IF15]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = or disjoint <8 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP52:%.*]] = or disjoint <8 x i32> [[TMP2]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP53:%.*]] = extractelement <8 x i1> [[TMP3]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP53]], label [[PRED_LOAD_IF17:%.*]], label [[PRED_LOAD_CONTINUE18:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if17:
@@ -2522,7 +2522,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE62]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue62:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP167:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP167]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end:
@@ -2559,7 +2559,7 @@ define dso_local void @masked_strided2_unknown_tc(ptr noalias nocapture readonly
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 ; ENABLED_MASKED_STRIDED-NEXT:    call void @llvm.masked.store.v16i8.p0(<16 x i8> [[INTERLEAVED_VEC]], ptr [[TMP8]], i32 1, <16 x i1> [[INTERLEAVED_MASK]])
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
-; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP10]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; ENABLED_MASKED_STRIDED:       for.end:
@@ -2647,7 +2647,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE60:%.*]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE60]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <8 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <8 x i32> [[VEC_IND]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if:
@@ -2728,7 +2728,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
 ; DISABLED_MASKED_STRIDED:       pred.load.continue14:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP49:%.*]] = phi <8 x i8> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP48]], [[PRED_LOAD_IF13]] ]
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP50:%.*]] = or disjoint <8 x i32> [[TMP1]], splat (i32 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP51:%.*]] = extractelement <8 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP51]], label [[PRED_LOAD_IF15:%.*]], label [[PRED_LOAD_CONTINUE16:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.load.if15:
@@ -2956,7 +2956,7 @@ define dso_local void @unconditional_masked_strided2_unknown_tc(ptr noalias noca
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE60]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue60:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP165:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP165]], label [[FOR_END]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
index b850dc3ecef85d..bd54f87b504896 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll
@@ -27,7 +27,7 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur
 ; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = shl nuw nsw <4 x i64> [[VEC_IND]], splat (i64 2)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[POINTS:%.*]], i64 [[TMP2]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i64 1
@@ -46,7 +46,7 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur
 ; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP13]], ptr [[TMP9]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 2
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = or disjoint <4 x i64> [[TMP1]], <i64 1, i64 1, i64 1, i64 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP15:%.*]] = or disjoint <4 x i64> [[TMP1]], splat (i64 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP16:%.*]] = extractelement <4 x i64> [[TMP15]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[POINTS]], i64 [[TMP16]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP18:%.*]] = extractelement <4 x i64> [[TMP15]], i64 1
@@ -64,7 +64,7 @@ define dso_local void @test1(ptr noalias nocapture %points, ptr noalias nocaptur
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 3
 ; DISABLED_MASKED_STRIDED-NEXT:    store i16 [[TMP27]], ptr [[TMP23]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP28]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end:
@@ -141,7 +141,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP1]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nsw <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nsw <4 x i64> [[VEC_IND]], splat (i64 2)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
@@ -180,7 +180,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr
 ; DISABLED_MASKED_STRIDED:       pred.store.continue6:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD7:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP19]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = or disjoint <4 x i64> [[TMP2]], <i64 1, i64 1, i64 1, i64 1>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP20:%.*]] = or disjoint <4 x i64> [[TMP2]], splat (i64 1)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP21]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if8:
@@ -218,7 +218,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE15]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue15:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP37]], label [[FOR_END_LOOPEXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end.loopexit:
@@ -247,7 +247,7 @@ define dso_local void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP1]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = shl nsw i64 [[INDEX]], 3
-; ENABLED_MASKED_STRIDED-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[POINTS:%.+]], i64 [[TMP2]]
+; ENABLED_MASKED_STRIDED-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[POINTS:%.*]], i64 [[TMP2]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[Y:%.*]], i64 [[INDEX]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_MASKED_LOAD3:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr [[TMP3]], i32 2, <4 x i1> [[TMP0]], <4 x i16> poison)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_MASKED_LOAD]], <4 x i16> [[WIDE_MASKED_LOAD3]], <16 x i32> <i32 0, i32 4, i32 poison, i32 poison, i32 1, i32 5, i32 poison, i32 poison, i32 2, i32 6, i32 poison, i32 poison, i32 3, i32 7, i32 poison, i32 poison>
@@ -314,7 +314,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
 ; DISABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer
-; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
+; DISABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], splat (i64 3)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; DISABLED_MASKED_STRIDED:       pred.store.if:
@@ -352,7 +352,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture
 ; DISABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; DISABLED_MASKED_STRIDED:       pred.store.continue6:
 ; DISABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; DISABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; DISABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; DISABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP19]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; DISABLED_MASKED_STRIDED:       for.end:
@@ -367,7 +367,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i64 [[INDEX]]
 ; ENABLED_MASKED_STRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP0]], align 2
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i16> [[WIDE_LOAD]], zeroinitializer
-; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
+; ENABLED_MASKED_STRIDED-NEXT:    [[TMP2:%.*]] = mul nuw nsw <4 x i64> [[VEC_IND]], splat (i64 3)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; ENABLED_MASKED_STRIDED:       pred.store.if:
@@ -405,7 +405,7 @@ define dso_local void @test(ptr noalias nocapture %points, ptr noalias nocapture
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; ENABLED_MASKED_STRIDED:       pred.store.continue6:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; ENABLED_MASKED_STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; ENABLED_MASKED_STRIDED-NEXT:    br i1 [[TMP19]], label [[FOR_END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; ENABLED_MASKED_STRIDED:       for.end:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
index eee1b6f35d1b70..126bbf9afc34e8 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll
@@ -227,7 +227,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE2]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_UDIV_CONTINUE2]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = mul <2 x i64> [[VEC_IND]], <i64 777, i64 777>
+; CHECK-NEXT:    [[TMP0:%.*]] = mul <2 x i64> [[VEC_IND]], splat (i64 777)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
 ; CHECK:       pred.udiv.if:
@@ -253,7 +253,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[BROADCAST_SPLAT4]]
 ; CHECK-NEXT:    [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       middle.block:
@@ -301,7 +301,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; SINK-GATHER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE14:%.*]] ]
 ; SINK-GATHER-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE14]] ]
 ; SINK-GATHER-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP66:%.*]], [[PRED_UDIV_CONTINUE14]] ]
-; SINK-GATHER-NEXT:    [[TMP0:%.*]] = mul <8 x i64> [[VEC_IND]], <i64 777, i64 777, i64 777, i64 777, i64 777, i64 777, i64 777, i64 777>
+; SINK-GATHER-NEXT:    [[TMP0:%.*]] = mul <8 x i64> [[VEC_IND]], splat (i64 777)
 ; SINK-GATHER-NEXT:    [[TMP1:%.*]] = extractelement <8 x i1> [[BROADCAST_SPLAT]], i32 0
 ; SINK-GATHER-NEXT:    br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
 ; SINK-GATHER:       pred.udiv.if:
@@ -393,7 +393,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) {
 ; SINK-GATHER-NEXT:    [[PREDPHI:%.*]] = select <8 x i1> [[BROADCAST_SPLAT]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]]
 ; SINK-GATHER-NEXT:    [[TMP66]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]]
 ; SINK-GATHER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; SINK-GATHER-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; SINK-GATHER-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
 ; SINK-GATHER-NEXT:    [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; SINK-GATHER-NEXT:    br i1 [[TMP67]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; SINK-GATHER:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll
index 8b06d9f61e5da1..7a385993a97991 100644
--- a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll
+++ b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll
@@ -115,7 +115,7 @@ define i64 @invar_cond(i1 %c) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> splat (i64 1)
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -175,7 +175,7 @@ define i64 @invar_cond_incoming_ops_reordered(i1 %c) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i64> zeroinitializer
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> splat (i64 1), <4 x i64> zeroinitializer
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll
index f2111081ffca9c..8397b1ed88dcdd 100644
--- a/llvm/test/Transforms/LoopVectorize/bsd_regex.ll
+++ b/llvm/test/Transforms/LoopVectorize/bsd_regex.ll
@@ -17,9 +17,9 @@ define i32 @foo(ptr nocapture %A) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[STEP_ADD]], <i64 8, i64 8>
+; CHECK-NEXT:    [[TMP0:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 8)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP0]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP0]], i64 1
@@ -33,7 +33,7 @@ define i32 @foo(ptr nocapture %A) {
 ; CHECK-NEXT:    store i32 4, ptr [[TMP7]], align 4
 ; CHECK-NEXT:    store i32 4, ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -41,7 +41,7 @@ define i32 @foo(ptr nocapture %A) {
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret i32 undef
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
index aea383226c71e6..c745b4f74786c9 100644
--- a/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
+++ b/llvm/test/Transforms/LoopVectorize/create-induction-resume.ll
@@ -49,7 +49,7 @@ define void @test(i32 %arg, i32 %L1.limit, i32 %L2.switch, i1 %c, ptr %dst) {
 ; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDUCTION_IV_LCSSA1]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> <i32 0, i32 1, i32 2, i32 3>, [[DOTSPLAT]]
-; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[TMP4]]
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> splat (i32 1), [[TMP4]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[INDUCTION_IV_LCSSA1]], 4
 ; CHECK-NEXT:    [[DOTSPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
 ; CHECK-NEXT:    [[DOTSPLAT4:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
index 3bf2591391121e..2f760a05c31ea6 100644
--- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
+++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll
@@ -19,19 +19,19 @@ define void @foo(ptr %h) !dbg !4 {
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[H]], <4 x i64> [[VEC_PHI]], !dbg [[DBG21]]
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> zeroinitializer, <4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !dbg [[DBG22:![0-9]+]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 1, !dbg [[DBG22]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x ptr> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !dbg [[DBG22]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 1), <4 x ptr> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !dbg [[DBG22]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 2, !dbg [[DBG22]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> <i32 2, i32 2, i32 2, i32 2>, <4 x ptr> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !dbg [[DBG22]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 2), <4 x ptr> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !dbg [[DBG22]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, <4 x ptr> [[TMP0]], i64 3, !dbg [[DBG22]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x ptr> [[TMP3]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !dbg [[DBG22]]
-; CHECK-NEXT:    [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], <i64 1, i64 1, i64 1, i64 1>, !dbg [[DBG24:![0-9]+]]
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i64> [[TMP4]], <i64 5, i64 5, i64 5, i64 5>, !dbg [[DBG25:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> splat (i32 3), <4 x ptr> [[TMP3]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !dbg [[DBG22]]
+; CHECK-NEXT:    [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1), !dbg [[DBG24:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i64> [[TMP4]], splat (i64 5), !dbg [[DBG25:![0-9]+]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0, !dbg [[DBG26:![0-9]+]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[FOR_COND_CLEANUP32]], label [[FOR_COND5_PREHEADER1]], !dbg [[DBG26]]
 ; CHECK:       vector.latch:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]], !dbg [[DBG21]]
 ; CHECK:       scalar.ph:
@@ -45,19 +45,19 @@ define void @foo(ptr %h) !dbg !4 {
 ; CHECK-NEXT:    [[L_022:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ [[INC10:%.*]], [[FOR_COND5_PREHEADER]] ]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[H]], i64 [[L_022]]
 ; CHECK-NEXT:    store i32 0, ptr [[TMP10]], align 4, !dbg [[DBG22]]
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[TMP10]], i64 1, !dbg [[DBG33:![0-9]+]]
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[TMP10]], i64 1, !dbg [[DBG31:![0-9]+]]
 ; CHECK-NEXT:    store i32 1, ptr [[ARRAYIDX_1]], align 4, !dbg [[DBG22]]
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr i32, ptr [[TMP10]], i64 2, !dbg [[DBG33]]
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr i32, ptr [[TMP10]], i64 2, !dbg [[DBG31]]
 ; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX_2]], align 4, !dbg [[DBG22]]
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr i32, ptr [[TMP10]], i64 3, !dbg [[DBG33]]
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr i32, ptr [[TMP10]], i64 3, !dbg [[DBG31]]
 ; CHECK-NEXT:    store i32 3, ptr [[ARRAYIDX_3]], align 4, !dbg [[DBG22]]
 ; CHECK-NEXT:    [[INC10]] = add nuw nsw i64 [[L_022]], 1, !dbg [[DBG24]]
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC10]], 5, !dbg [[DBG25]]
 ; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_COND5_PREHEADER]], !dbg [[DBG26]]
 ; CHECK:       for.cond.cleanup3:
-; CHECK-NEXT:    [[INC13]] = add nuw nsw i64 [[I_023]], 1, !dbg [[DBG27:![0-9]+]]
+; CHECK-NEXT:    [[INC13]] = add nuw nsw i64 [[I_023]], 1, !dbg [[DBG32:![0-9]+]]
 ; CHECK-NEXT:      #dbg_value(i64 [[INC13]], [[META11]], !DIExpression(), [[META20]])
-; CHECK-NEXT:    [[EXITCOND24_NOT:%.*]] = icmp eq i64 [[INC13]], 23, !dbg [[DBG28:![0-9]+]]
+; CHECK-NEXT:    [[EXITCOND24_NOT:%.*]] = icmp eq i64 [[INC13]], 23, !dbg [[DBG33:![0-9]+]]
 ; CHECK-NEXT:    br i1 [[EXITCOND24_NOT]], label [[EXIT]], label [[FOR_COND1_PREHEADER]], !dbg [[DBG21]], !llvm.loop [[LOOP34:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void, !dbg [[DBG35:![0-9]+]]
@@ -160,13 +160,13 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
 ; CHECK: [[DBG24]] = !DILocation(line: 11, column: 32, scope: [[META19]])
 ; CHECK: [[DBG25]] = !DILocation(line: 11, column: 26, scope: [[META19]])
 ; CHECK: [[DBG26]] = !DILocation(line: 11, column: 5, scope: [[META15]])
-; CHECK: [[LOOP29]] = distinct !{[[LOOP29]], [[DBG21]], [[META30:![0-9]+]], [[META31:![0-9]+]], [[META32:![0-9]+]]}
-; CHECK: [[META30]] = !DILocation(line: 13, column: 13, scope: [[META12]])
-; CHECK: [[META31]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META32]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[DBG33]] = !DILocation(line: 13, column: 2, scope: [[META23]])
-; CHECK: [[DBG27]] = !DILocation(line: 10, column: 30, scope: [[META16]])
-; CHECK: [[DBG28]] = !DILocation(line: 10, column: 24, scope: [[META16]])
-; CHECK: [[LOOP34]] = distinct !{[[LOOP34]], [[DBG21]], [[META30]], [[META31]]}
+; CHECK: [[LOOP27]] = distinct !{[[LOOP27]], [[DBG21]], [[META28:![0-9]+]], [[META29:![0-9]+]], [[META30:![0-9]+]]}
+; CHECK: [[META28]] = !DILocation(line: 13, column: 13, scope: [[META12]])
+; CHECK: [[META29]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META30]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[DBG31]] = !DILocation(line: 13, column: 2, scope: [[META23]])
+; CHECK: [[DBG32]] = !DILocation(line: 10, column: 30, scope: [[META16]])
+; CHECK: [[DBG33]] = !DILocation(line: 10, column: 24, scope: [[META16]])
+; CHECK: [[LOOP34]] = distinct !{[[LOOP34]], [[DBG21]], [[META28]], [[META29]]}
 ; CHECK: [[DBG35]] = !DILocation(line: 14, column: 1, scope: [[DBG4]])
 ;.
diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll
index aae9393bbe0dd2..b6a97a4f77585a 100644
--- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll
@@ -46,16 +46,16 @@ define i64 @dead_instructions_01(ptr %a, i64 %n) {
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_BODY]]:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
-; CHECK-NEXT:    [[R:%.*]] = phi i64 [ [[TMP2:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R:%.*]] = phi i64 [ [[TMP4:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2]] = add i64 [[TMP1]], [[R]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP4]] = add i64 [[TMP2]], [[R]]
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[COND]], label %[[FOR_BODY]], label %[[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       [[FOR_END]]:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i64 [ [[TMP2]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = phi i64 [ [[TMP4]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i64 [[TMP5]]
 ;
 entry:
   br label %for.body
@@ -155,8 +155,8 @@ define void @dead_load_and_vector_pointer(ptr %a, ptr %b) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 2
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8, !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP5]], align 8, !alias.scope [[META6]], !noalias [[META9]]
-; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD]], <i32 1, i32 1>
-; CHECK-NEXT:    [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD2]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 1)
+; CHECK-NEXT:    [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD2]], splat (i32 1)
 ; CHECK-NEXT:    store <2 x i32> [[TMP6]], ptr [[TMP4]], align 4, !alias.scope [[META6]], !noalias [[META9]]
 ; CHECK-NEXT:    store <2 x i32> [[TMP7]], ptr [[TMP5]], align 4, !alias.scope [[META6]], !noalias [[META9]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP0]]
diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll
index 274174cceeb1c2..1e37b7be662dfd 100644
--- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll
+++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll
@@ -19,9 +19,9 @@ define dso_local void @constTC(ptr noalias nocapture %A) optsize {
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4
-; CHECK-NEXT:    store <2 x i32> <i32 13, i32 13>, ptr [[TMP6]], align 1
-; CHECK-NEXT:    store <2 x i32> <i32 13, i32 13>, ptr [[TMP7]], align 1
-; CHECK-NEXT:    store <2 x i32> <i32 13, i32 13>, ptr [[TMP8]], align 1
+; CHECK-NEXT:    store <2 x i32> splat (i32 13), ptr [[TMP6]], align 1
+; CHECK-NEXT:    store <2 x i32> splat (i32 13), ptr [[TMP7]], align 1
+; CHECK-NEXT:    store <2 x i32> splat (i32 13), ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 6
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1800
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
index 76ca2507b914cf..53686ee76cbbdb 100644
--- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
+++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll
@@ -21,7 +21,7 @@ define dso_local void @alignTC(ptr noalias nocapture %A, i32 %n) optsize {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 13, i32 13, i32 13, i32 13>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    store <4 x i32> splat (i32 13), ptr [[TMP2]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -37,7 +37,7 @@ define dso_local void @alignTC(ptr noalias nocapture %A, i32 %n) optsize {
 ; CHECK-NEXT:    store i32 13, ptr [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], [[ALIGNEDTC]]
-; CHECK-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
@@ -86,7 +86,7 @@ define dso_local void @assumeAlignedTC(ptr noalias nocapture %A, i32 %p, i32 %q)
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 13, i32 13, i32 13, i32 13>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    store <4 x i32> splat (i32 13), ptr [[TMP2]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -198,7 +198,7 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll
index 5f2e91b8d1f32d..9d24e369b3737e 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll
@@ -226,7 +226,7 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP3]] = or <4 x i1> [[VEC_PHI]], [[TMP2]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
@@ -264,7 +264,7 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP10]] = or <4 x i1> [[VEC_PHI10]], [[TMP9]]
 ; CHECK-NEXT:    [[INDEX_NEXT15]] = add nuw i64 [[INDEX9]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT12]] = add <4 x i32> [[VEC_IND11]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT12]] = add <4 x i32> [[VEC_IND11]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
index f78654176e23eb..3c1dd1bd8b6d13 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -201,7 +201,7 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2
@@ -228,7 +228,7 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
 ; CHECK-NEXT:    [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ [[TMP12]], [[VEC_EPILOG_PH]] ], [ [[TMP20:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[INDEX1]], 0
-; CHECK-NEXT:    [[TMP14:%.*]] = and <4 x i32> [[VEC_PHI2]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP14:%.*]] = and <4 x i32> [[VEC_PHI2]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[PTR]], i32 [[TMP13]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i16>, ptr [[TMP16]], align 2
@@ -327,7 +327,7 @@ define float @multiple_fp_rdx(ptr %A, i64 %N) {
 ; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
 ; CHECK-NEXT:    [[N_MOD_VF4:%.*]] = urem i64 [[N]], 4
 ; CHECK-NEXT:    [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, float [[BC_MERGE_RDX]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> splat (float 1.000000e+00), float [[BC_MERGE_RDX]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> zeroinitializer, float [[BC_MERGE_RDX3]], i32 0
 ; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
 ; CHECK:       vec.epilog.vector.body:
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll
index 756b0ab9612b8b..7a92d1a1c9ea5f 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-trunc-induction-steps.ll
@@ -30,7 +30,7 @@ define void @trunc_iv_steps_with_epilogue(ptr %A, i64 %N) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1
-; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i8> [[WIDE_LOAD]], <i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i8> [[WIDE_LOAD]], splat (i8 2)
 ; CHECK-NEXT:    store <4 x i8> [[TMP9]], ptr [[TMP8]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -54,7 +54,7 @@ define void @trunc_iv_steps_with_epilogue(ptr %A, i64 %N) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[A]], i32 [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP14]], align 1
-; CHECK-NEXT:    [[TMP15:%.*]] = add <4 x i8> [[WIDE_LOAD6]], <i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    [[TMP15:%.*]] = add <4 x i8> [[WIDE_LOAD6]], splat (i8 2)
 ; CHECK-NEXT:    store <4 x i8> [[TMP15]], ptr [[TMP14]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT7]] = add nuw i64 [[INDEX5]], 4
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT7]], [[N_VEC3]]
diff --git a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
index 47636b2c66d296..f648de1358998e 100644
--- a/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
+++ b/llvm/test/Transforms/LoopVectorize/extract-last-veclane.ll
@@ -13,7 +13,7 @@ define void @inv_store_last_lane(ptr noalias nocapture %a, ptr noalias nocapture
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -74,7 +74,7 @@ define float @ret_last_lane(ptr noalias nocapture %a, ptr noalias nocapture read
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
index eda92aae095ddf..0f206bfb92fa01 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll
@@ -445,9 +445,9 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses(ptr noalias
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], splat (float 2.000000e+00)
 ; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
-; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast <4 x float> [[TMP4]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast <4 x float> [[TMP4]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[DST_1:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0
 ; CHECK-NEXT:    store <4 x float> [[TMP6]], ptr [[TMP9]], align 4
@@ -535,10 +535,10 @@ define void @instruction_with_2_FOR_operands_and_multiple_other_uses_chain(ptr n
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], splat (float 2.000000e+00)
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], splat (float 2.000000e+00)
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP2]]
-; CHECK-NEXT:    [[TMP8:%.*]] = fadd fast <4 x float> [[TMP4]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP8:%.*]] = fadd fast <4 x float> [[TMP4]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[DST_1:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0
 ; CHECK-NEXT:    store <4 x float> [[TMP7]], ptr [[TMP10]], align 4
@@ -696,8 +696,8 @@ define i16 @multiple_exit(ptr %p, i32 %n) {
 ; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
 ; CHECK-NEXT:    br label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -772,8 +772,8 @@ define i16 @multiple_exit2(ptr %p, i32 %n) {
 ; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
 ; CHECK-NEXT:    br label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
 ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -841,8 +841,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) {
 ; CHECK-NEXT:    [[TMP5]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32>
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], <i32 213, i32 213, i32 213, i32 213>
-; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> <i32 22, i32 22, i32 22, i32 22>
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], splat (i32 213)
+; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> splat (i32 22)
 ; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
@@ -852,8 +852,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) {
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ]
@@ -922,9 +922,9 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) {
 ; CHECK-NEXT:    [[TMP5]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32>
-; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP7]], <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP9:%.*]] = mul <4 x i32> [[TMP8]], <i32 99, i32 99, i32 99, i32 99>
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt <4 x i32> [[TMP7]], <i32 213, i32 213, i32 213, i32 213>
+; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP7]], splat (i32 2)
+; CHECK-NEXT:    [[TMP9:%.*]] = mul <4 x i32> [[TMP8]], splat (i32 99)
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt <4 x i32> [[TMP7]], splat (i32 213)
 ; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP7]], <4 x i32> [[TMP9]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP11]], ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
@@ -1045,7 +1045,7 @@ define void @test_for_sink_instruction_after_same_incoming_1(ptr %ptr) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>, [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <4 x double> [[TMP4]], [[TMP5]]
 ; CHECK-NEXT:    store <4 x double> [[TMP6]], ptr [[TMP2]], align 8
@@ -1114,7 +1114,7 @@ define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) {
 ; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>, [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <4 x double> [[TMP5]], [[TMP3]]
 ; CHECK-NEXT:    store <4 x double> [[TMP6]], ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -1125,7 +1125,7 @@ define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) {
 ; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
 ; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY]] ]
 ; CHECK-NEXT:    [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
index dbe373b46cce21..981e434aef7bdf 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
@@ -123,9 +123,9 @@ define void @test_pr54223_sink_after_insertion_order(ptr noalias %a, ptr noalias
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT5:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT5:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
@@ -230,11 +230,11 @@ define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP2]], <i32 10, i32 10, i32 10, i32 10>
-; CHECK-NEXT:    [[TMP4]] = xor <4 x i32> <i32 12, i32 12, i32 12, i32 12>, [[TMP2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP2]], splat (i32 10)
+; CHECK-NEXT:    [[TMP4]] = xor <4 x i32> splat (i32 12), [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shl <4 x i32> [[TMP2]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], splat (i32 255)
 ; CHECK-NEXT:    [[TMP8:%.*]] = and <4 x i32> [[TMP7]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
@@ -246,9 +246,9 @@ define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %
 ; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
 ; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index c4e3e0b8c5a369..4d42e9b38164f5 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -901,9 +901,9 @@ define i32 @PR27246() {
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 -4)
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 -4, i32 -4, i32 -4, i32 -4>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 -4)
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -996,7 +996,7 @@ define i32 @PR27246() {
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[VEC_IND]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
+; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4)
 ; SINK-AFTER-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; SINK-AFTER-NEXT:    br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; SINK-AFTER:       middle.block:
@@ -1240,7 +1240,7 @@ define i64 @constant_folded_previous_value() {
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ <i64 1, i64 1, i64 1, i64 1>, [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ splat (i64 1), [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
@@ -1297,7 +1297,7 @@ define i64 @constant_folded_previous_value() {
 ; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; SINK-AFTER:       vector.body:
 ; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ <i64 1, i64 1, i64 1, i64 1>, [[VECTOR_BODY]] ]
+; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ splat (i64 1), [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; SINK-AFTER-NEXT:    [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; SINK-AFTER-NEXT:    br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
@@ -1351,10 +1351,10 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x)  {
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; UNROLL-NO-IC-NEXT:    [[TMP0]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 4, i32 4, i32 4, i32 4>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
 ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -1423,7 +1423,7 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x)  {
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[TMP0]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; SINK-AFTER-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
 ; SINK-AFTER-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; SINK-AFTER:       middle.block:
@@ -1978,8 +1978,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
 ; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP7]], i64 1
 ; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
 ; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4
-; UNROLL-NO-IC-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr [[TMP17]], align 4
-; UNROLL-NO-IC-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr [[TMP18]], align 4
+; UNROLL-NO-IC-NEXT:    store <4 x i32> splat (i32 7), ptr [[TMP17]], align 4
+; UNROLL-NO-IC-NEXT:    store <4 x i32> splat (i32 7), ptr [[TMP18]], align 4
 ; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = load i16, ptr [[TMP9]], align 2
 ; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = load i16, ptr [[TMP10]], align 2
 ; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = load i16, ptr [[TMP11]], align 2
@@ -2120,7 +2120,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
 ; SINK-AFTER-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1
 ; SINK-AFTER-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1
 ; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
-; SINK-AFTER-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, ptr [[TMP9]], align 4
+; SINK-AFTER-NEXT:    store <4 x i32> splat (i32 7), ptr [[TMP9]], align 4
 ; SINK-AFTER-NEXT:    [[TMP10:%.*]] = load i16, ptr [[TMP5]], align 2
 ; SINK-AFTER-NEXT:    [[TMP11:%.*]] = load i16, ptr [[TMP6]], align 2
 ; SINK-AFTER-NEXT:    [[TMP12:%.*]] = load i16, ptr [[TMP7]], align 2
@@ -2221,8 +2221,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64
 ; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
 ; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[TMP7]], <i32 2, i32 2, i32 2, i32 2>
-; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], <i32 2, i32 2, i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[TMP7]], splat (i32 2)
+; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
 ; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
 ; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP11]]
@@ -2339,7 +2339,7 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64
 ; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP3]], align 2
 ; SINK-AFTER-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; SINK-AFTER-NEXT:    [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32>
-; SINK-AFTER-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], <i32 2, i32 2, i32 2, i32 2>
+; SINK-AFTER-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], splat (i32 2)
 ; SINK-AFTER-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
 ; SINK-AFTER-NEXT:    [[TMP8:%.*]] = mul nsw <4 x i32> [[TMP6]], [[TMP7]]
 ; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]]
@@ -2510,26 +2510,26 @@ define void @sink_dead_inst(ptr %a) {
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; UNROLL-NO-IC-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
 ; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]]
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
-; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
-; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add <4 x i16> [[STEP_ADD]], <i16 1, i16 1, i16 1, i16 1>
+; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1)
+; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1)
 ; UNROLL-NO-IC-NEXT:    [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32>
-; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add <4 x i16> [[TMP1]], <i16 5, i16 5, i16 5, i16 5>
-; UNROLL-NO-IC-NEXT:    [[TMP5]] = add <4 x i16> [[TMP2]], <i16 5, i16 5, i16 5, i16 5>
+; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add <4 x i16> [[TMP1]], splat (i16 5)
+; UNROLL-NO-IC-NEXT:    [[TMP5]] = add <4 x i16> [[TMP2]], splat (i16 5)
 ; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], <i16 10, i16 10, i16 10, i16 10>
-; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = sub <4 x i16> [[TMP7]], <i16 10, i16 10, i16 10, i16 10>
+; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], splat (i16 10)
+; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = sub <4 x i16> [[TMP7]], splat (i16 10)
 ; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[TMP0]]
 ; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr i16, ptr [[TMP10]], i32 0
 ; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = getelementptr i16, ptr [[TMP10]], i32 4
 ; UNROLL-NO-IC-NEXT:    store <4 x i16> [[TMP8]], ptr [[TMP11]], align 2
 ; UNROLL-NO-IC-NEXT:    store <4 x i16> [[TMP9]], ptr [[TMP12]], align 2
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
 ; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -2618,16 +2618,16 @@ define void @sink_dead_inst(ptr %a) {
 ; SINK-AFTER-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
 ; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST]]
 ; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
-; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
+; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1)
 ; SINK-AFTER-NEXT:    [[TMP2]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-; SINK-AFTER-NEXT:    [[TMP3]] = add <4 x i16> [[TMP1]], <i16 5, i16 5, i16 5, i16 5>
+; SINK-AFTER-NEXT:    [[TMP3]] = add <4 x i16> [[TMP1]], splat (i16 5)
 ; SINK-AFTER-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; SINK-AFTER-NEXT:    [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], <i16 10, i16 10, i16 10, i16 10>
+; SINK-AFTER-NEXT:    [[TMP5:%.*]] = sub <4 x i16> [[TMP4]], splat (i16 10)
 ; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[TMP0]]
 ; SINK-AFTER-NEXT:    [[TMP7:%.*]] = getelementptr i16, ptr [[TMP6]], i32 0
 ; SINK-AFTER-NEXT:    store <4 x i16> [[TMP5]], ptr [[TMP7]], align 2
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
+; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; SINK-AFTER-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
 ; SINK-AFTER-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; SINK-AFTER:       middle.block:
@@ -3001,7 +3001,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_STORE_CONTINUE30]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_STORE_CONTINUE30]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE30]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
@@ -3148,7 +3148,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
 ; UNROLL-NO-IC-NEXT:    [[TMP72:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]]
 ; UNROLL-NO-IC-NEXT:    [[TMP73:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI2]]
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 4, i32 4, i32 4, i32 4>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
 ; UNROLL-NO-IC-NEXT:    [[TMP74:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP32:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -3357,7 +3357,7 @@ define i32 @sink_into_replication_region_multiple(ptr %x, i32 %y) {
 ; SINK-AFTER:       pred.store.continue13:
 ; SINK-AFTER-NEXT:    [[TMP37:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; SINK-AFTER-NEXT:    [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; SINK-AFTER-NEXT:    br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF27]], !llvm.loop [[LOOP32:![0-9]+]]
 ; SINK-AFTER:       middle.block:
@@ -3421,10 +3421,10 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
-; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], <i16 1, i16 1, i16 1, i16 1>
+; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1)
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]]
 ; UNROLL-NO-IC-NEXT:    [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32>
 ; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]]
@@ -3433,7 +3433,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
 ; UNROLL-NO-IC-NEXT:    store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4
 ; UNROLL-NO-IC-NEXT:    store <4 x i32> zeroinitializer, ptr [[TMP6]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
 ; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -3521,14 +3521,14 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
 ; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
-; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
+; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1)
 ; SINK-AFTER-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]]
 ; SINK-AFTER-NEXT:    [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32>
 ; SINK-AFTER-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]]
 ; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
 ; SINK-AFTER-NEXT:    store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
+; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; SINK-AFTER-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
 ; SINK-AFTER-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
 ; SINK-AFTER:       middle.block:
@@ -3592,11 +3592,11 @@ define void @unused_recurrence(ptr %a) {
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
-; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add <4 x i16> [[STEP_ADD]], <i16 1, i16 1, i16 1, i16 1>
-; UNROLL-NO-IC-NEXT:    [[TMP1]] = add <4 x i16> [[TMP0]], <i16 5, i16 5, i16 5, i16 5>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
+; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 1)
+; UNROLL-NO-IC-NEXT:    [[TMP1]] = add <4 x i16> [[TMP0]], splat (i16 5)
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4)
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -3659,10 +3659,10 @@ define void @unused_recurrence(ptr %a) {
 ; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
-; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
-; SINK-AFTER-NEXT:    [[TMP1]] = add <4 x i16> [[TMP0]], <i16 5, i16 5, i16 5, i16 5>
+; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 1)
+; SINK-AFTER-NEXT:    [[TMP1]] = add <4 x i16> [[TMP0]], splat (i16 5)
 ; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
+; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4)
 ; SINK-AFTER-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1028
 ; SINK-AFTER-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
 ; SINK-AFTER:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index cedaf019a958bd..43299676d87c6b 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -50,7 +50,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC4_INTERL1-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; VEC4_INTERL1-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]]
 ; VEC4_INTERL1:       middle.block:
 ; VEC4_INTERL1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC4_INTERL1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -67,7 +67,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL1-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; VEC4_INTERL1:       for.end.loopexit:
 ; VEC4_INTERL1-NEXT:    br label [[FOR_END]]
 ; VEC4_INTERL1:       for.end:
@@ -88,7 +88,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
 ; VEC4_INTERL2-NEXT:    [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]]
 ; VEC4_INTERL2-NEXT:    [[FPINC_INS:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
-; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[FPINC_INS]], <float 4.000000e+00, float poison
+; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = fmul fast <4 x float> [[FPINC_INS]], <float 4.000000e+00, float poison, float poison, float poison>
 ; VEC4_INTERL2-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <4 x i32> zeroinitializer
 ; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
 ; VEC4_INTERL2-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
@@ -108,7 +108,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fsub fast <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
 ; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC4_INTERL2-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; VEC4_INTERL2-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]]
 ; VEC4_INTERL2:       middle.block:
 ; VEC4_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC4_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -125,7 +125,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; VEC4_INTERL2:       for.end.loopexit:
 ; VEC4_INTERL2-NEXT:    br label [[FOR_END]]
 ; VEC4_INTERL2:       for.end:
@@ -159,7 +159,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC1_INTERL2-NEXT:    store float [[TMP4]], ptr [[TMP6]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC1_INTERL2-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; VEC1_INTERL2-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]]
 ; VEC1_INTERL2:       middle.block:
 ; VEC1_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC1_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -176,7 +176,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC1_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; VEC1_INTERL2:       for.end.loopexit:
 ; VEC1_INTERL2-NEXT:    br label [[FOR_END]]
 ; VEC1_INTERL2:       for.end:
@@ -214,7 +214,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]]
 ; VEC2_INTERL1_PRED_STORE:       middle.block:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]]
@@ -227,7 +227,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N)
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; VEC2_INTERL1_PRED_STORE:       for.end:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    ret void
 ;
@@ -297,7 +297,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC4_INTERL1-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VEC4_INTERL1-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; VEC4_INTERL1:       middle.block:
 ; VEC4_INTERL1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC4_INTERL1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -314,7 +314,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL1-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; VEC4_INTERL1:       for.end.loopexit:
 ; VEC4_INTERL1-NEXT:    br label [[FOR_END]]
 ; VEC4_INTERL1:       for.end:
@@ -335,12 +335,12 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]]
 ; VEC4_INTERL2-NEXT:    [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]]
 ; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
-; VEC4_INTERL2-NEXT:    [[MUL:%.*]] = fmul reassoc <4 x float> [[DOTSPLATINSERT2]], <float 4.000000e+00, float poison
+; VEC4_INTERL2-NEXT:    [[MUL:%.*]] = fmul reassoc <4 x float> [[DOTSPLATINSERT2]], <float 4.000000e+00, float poison, float poison, float poison>
 ; VEC4_INTERL2-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[MUL]], <4 x float> poison, <4 x i32> zeroinitializer
 ; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
 ; VEC4_INTERL2-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
-; VEC4_INTERL2-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
+; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT3:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
+; VEC4_INTERL2-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT3]], <4 x float> poison, <4 x i32> zeroinitializer
 ; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
 ; VEC4_INTERL2-NEXT:    [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP2]]
 ; VEC4_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -355,7 +355,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
 ; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC4_INTERL2-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VEC4_INTERL2-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; VEC4_INTERL2:       middle.block:
 ; VEC4_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC4_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -372,7 +372,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; VEC4_INTERL2:       for.end.loopexit:
 ; VEC4_INTERL2-NEXT:    br label [[FOR_END]]
 ; VEC4_INTERL2:       for.end:
@@ -408,7 +408,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC1_INTERL2-NEXT:    store float [[TMP6]], ptr [[TMP8]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC1_INTERL2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VEC1_INTERL2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; VEC1_INTERL2:       middle.block:
 ; VEC1_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC1_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -425,7 +425,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC1_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; VEC1_INTERL2:       for.end.loopexit:
 ; VEC1_INTERL2-NEXT:    br label [[FOR_END]]
 ; VEC1_INTERL2:       for.end:
@@ -463,7 +463,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[DOTSPLAT5]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; VEC2_INTERL1_PRED_STORE:       middle.block:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]]
@@ -476,7 +476,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; VEC2_INTERL1_PRED_STORE:       for.end:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    ret void
 ;
@@ -539,9 +539,9 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00)
 ; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC4_INTERL1-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; VEC4_INTERL1-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; VEC4_INTERL1:       middle.block:
 ; VEC4_INTERL1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC4_INTERL1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -558,7 +558,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL1-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; VEC4_INTERL1:       for.end.loopexit:
 ; VEC4_INTERL1-NEXT:    br label [[FOR_END]]
 ; VEC4_INTERL1:       for.end:
@@ -584,15 +584,15 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL2:       vector.body:
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00)
 ; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP3]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
+; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 4.000000e+00)
 ; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC4_INTERL2-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; VEC4_INTERL2-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; VEC4_INTERL2:       middle.block:
 ; VEC4_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC4_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -609,7 +609,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; VEC4_INTERL2:       for.end.loopexit:
 ; VEC4_INTERL2-NEXT:    br label [[FOR_END]]
 ; VEC4_INTERL2:       for.end:
@@ -642,7 +642,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC1_INTERL2-NEXT:    store float [[TMP4]], ptr [[TMP6]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC1_INTERL2-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; VEC1_INTERL2-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; VEC1_INTERL2:       middle.block:
 ; VEC1_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC1_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -659,7 +659,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC1_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; VEC1_INTERL2:       for.end.loopexit:
 ; VEC1_INTERL2-NEXT:    br label [[FOR_END]]
 ; VEC1_INTERL2:       for.end:
@@ -688,9 +688,9 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], ptr [[TMP2]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float 1.000000e+00, float 1.000000e+00>
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00)
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
 ; VEC2_INTERL1_PRED_STORE:       middle.block:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]]
@@ -703,7 +703,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 {
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; VEC2_INTERL1_PRED_STORE:       for.end:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    ret void
 ;
@@ -783,17 +783,17 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND9]], ptr [[TMP6]], align 4
 ; VEC4_INTERL1-NEXT:    [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
-; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
+; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -5.000000e-01)
 ; VEC4_INTERL1-NEXT:    [[TMP9:%.*]] = fadd fast <4 x float> [[TMP8]], [[TMP7]]
 ; VEC4_INTERL1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
 ; VEC4_INTERL1-NEXT:    store <4 x float> [[TMP9]], ptr [[TMP10]], align 4
 ; VEC4_INTERL1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
 ; VEC4_INTERL1-NEXT:    store <4 x float> [[TMP8]], ptr [[TMP11]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
+; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float -2.000000e+00)
 ; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]]
 ; VEC4_INTERL1-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC4_INTERL1-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; VEC4_INTERL1-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; VEC4_INTERL1:       middle.block:
 ; VEC4_INTERL1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP1]]
 ; VEC4_INTERL1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -818,7 +818,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL1-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; VEC4_INTERL1:       for.end.loopexit:
 ; VEC4_INTERL1-NEXT:    br label [[FOR_END]]
 ; VEC4_INTERL1:       for.end:
@@ -843,7 +843,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC4_INTERL2-NEXT:    [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]]
 ; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
 ; VEC4_INTERL2-NEXT:    [[BROADCAST:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL2-NEXT:    [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[BROADCAST]], <float 4.000000e+00, float 4
+; VEC4_INTERL2-NEXT:    [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[BROADCAST]], splat (float 4.000000e+00)
 ; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
 ; VEC4_INTERL2-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
@@ -862,8 +862,8 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD11]], ptr [[TMP7]], align 4
 ; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[BROADCAST]]
 ; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = fadd fast <4 x float> [[STEP_ADD11]], [[BROADCAST]]
-; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
-; VEC4_INTERL2-NEXT:    [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -2.500000e+00, float -2.500000e+00, float -2.500000e+00, float -2.500000e+00>
+; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -5.000000e-01)
+; VEC4_INTERL2-NEXT:    [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -2.500000e+00)
 ; VEC4_INTERL2-NEXT:    [[TMP12:%.*]] = fadd fast <4 x float> [[TMP10]], [[TMP8]]
 ; VEC4_INTERL2-NEXT:    [[TMP13:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP9]]
 ; VEC4_INTERL2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
@@ -875,10 +875,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP10]], ptr [[TMP16]], align 4
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP11]], ptr [[TMP17]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -4.000000e+00, float -4.000000e+00, float -4.000000e+00, float -4.000000e+00>
+; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float -4.000000e+00)
 ; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT13]] = fadd fast <4 x float> [[STEP_ADD11]], [[DOTSPLAT5]]
 ; VEC4_INTERL2-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC4_INTERL2-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; VEC4_INTERL2-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; VEC4_INTERL2:       middle.block:
 ; VEC4_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP1]]
 ; VEC4_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -903,7 +903,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; VEC4_INTERL2:       for.end.loopexit:
 ; VEC4_INTERL2-NEXT:    br label [[FOR_END]]
 ; VEC4_INTERL2:       for.end:
@@ -956,7 +956,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC1_INTERL2-NEXT:    store float [[TMP13]], ptr [[TMP19]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC1_INTERL2-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; VEC1_INTERL2-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; VEC1_INTERL2:       middle.block:
 ; VEC1_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP1]]
 ; VEC1_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -981,7 +981,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC1_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; VEC1_INTERL2:       for.end.loopexit:
 ; VEC1_INTERL2-NEXT:    br label [[FOR_END]]
 ; VEC1_INTERL2:       for.end:
@@ -1023,17 +1023,17 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND9]], ptr [[TMP6]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01>
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], splat (float -5.000000e-01)
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP9:%.*]] = fadd fast <2 x float> [[TMP8]], [[TMP7]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[TMP9]], ptr [[TMP10]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[TMP8]], ptr [[TMP11]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float -1.000000e+00, float -1.000000e+00>
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float -1.000000e+00)
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; VEC2_INTERL1_PRED_STORE:       middle.block:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP1]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]]
@@ -1053,7 +1053,7 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; VEC2_INTERL1_PRED_STORE:       for.end:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    ret void
 ;
@@ -1121,9 +1121,9 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00)
 ; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC4_INTERL1-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; VEC4_INTERL1-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; VEC4_INTERL1:       middle.block:
 ; VEC4_INTERL1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC4_INTERL1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -1140,7 +1140,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL1-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; VEC4_INTERL1:       for.end.loopexit:
 ; VEC4_INTERL1-NEXT:    br label [[FOR_END]]
 ; VEC4_INTERL1:       for.end:
@@ -1163,15 +1163,15 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL2:       vector.body:
 ; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00)
 ; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4
 ; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], ptr [[TMP3]], align 4
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
+; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 4.000000e+00)
 ; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC4_INTERL2-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; VEC4_INTERL2-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; VEC4_INTERL2:       middle.block:
 ; VEC4_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC4_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -1188,7 +1188,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC4_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; VEC4_INTERL2:       for.end.loopexit:
 ; VEC4_INTERL2-NEXT:    br label [[FOR_END]]
 ; VEC4_INTERL2:       for.end:
@@ -1221,7 +1221,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC1_INTERL2-NEXT:    store float [[TMP4]], ptr [[TMP6]], align 4
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC1_INTERL2-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; VEC1_INTERL2-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; VEC1_INTERL2:       middle.block:
 ; VEC1_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC1_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
@@ -1238,7 +1238,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC1_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; VEC1_INTERL2:       for.end.loopexit:
 ; VEC1_INTERL2-NEXT:    br label [[FOR_END]]
 ; VEC1_INTERL2:       for.end:
@@ -1264,9 +1264,9 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], ptr [[TMP2]], align 4
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float 1.000000e+00, float 1.000000e+00>
+; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00)
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ; VEC2_INTERL1_PRED_STORE:       middle.block:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]]
@@ -1279,7 +1279,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) {
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; VEC2_INTERL1_PRED_STORE:       for.end:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    ret void
 ;
@@ -1361,7 +1361,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL1:       pred.store.continue8:
 ; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; VEC4_INTERL1-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC4_INTERL1-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; VEC4_INTERL1-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; VEC4_INTERL1:       middle.block:
 ; VEC4_INTERL1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
 ; VEC4_INTERL1-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -1383,7 +1383,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL1-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; VEC4_INTERL1-NEXT:    [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00
 ; VEC4_INTERL1-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; VEC4_INTERL1-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]]
+; VEC4_INTERL1-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]]
 ; VEC4_INTERL1:       for.end:
 ; VEC4_INTERL1-NEXT:    ret void
 ;
@@ -1477,7 +1477,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL2:       pred.store.continue17:
 ; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
 ; VEC4_INTERL2-NEXT:    [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC4_INTERL2-NEXT:    br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; VEC4_INTERL2-NEXT:    br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; VEC4_INTERL2:       middle.block:
 ; VEC4_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
 ; VEC4_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -1499,7 +1499,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC4_INTERL2-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; VEC4_INTERL2-NEXT:    [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00
 ; VEC4_INTERL2-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; VEC4_INTERL2-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]]
+; VEC4_INTERL2-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]]
 ; VEC4_INTERL2:       for.end:
 ; VEC4_INTERL2-NEXT:    ret void
 ;
@@ -1535,7 +1535,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC1_INTERL2:       pred.store.continue4:
 ; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC1_INTERL2-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; VEC1_INTERL2-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; VEC1_INTERL2:       middle.block:
 ; VEC1_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
 ; VEC1_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -1557,7 +1557,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC1_INTERL2-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; VEC1_INTERL2-NEXT:    [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00
 ; VEC1_INTERL2-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; VEC1_INTERL2-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]]
+; VEC1_INTERL2-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]]
 ; VEC1_INTERL2:       for.end:
 ; VEC1_INTERL2-NEXT:    ret void
 ;
@@ -1594,7 +1594,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC2_INTERL1_PRED_STORE:       pred.store.continue4:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
 ; VEC2_INTERL1_PRED_STORE:       middle.block:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
 ; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]]
@@ -1612,7 +1612,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) {
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00
 ; VEC2_INTERL1_PRED_STORE-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]]
+; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]]
 ; VEC2_INTERL1_PRED_STORE:       for.end:
 ; VEC2_INTERL1_PRED_STORE-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
index 8b0c99b353c8b7..e3278c2c54ceb8 100644
--- a/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-conversion-nest.ll
@@ -33,11 +33,11 @@ define i32 @foo(ptr nocapture %A, ptr nocapture %B, i32 %n) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META3]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], <i32 20, i32 20, i32 20, i32 20>
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], <i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 20)
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], splat (i32 4)
+; CHECK-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> splat (i32 4), <4 x i32> splat (i32 5)
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP10]], <4 x i32> splat (i32 3)
+; CHECK-NEXT:    [[PREDPHI3:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[PREDPHI]], <4 x i32> splat (i32 9)
 ; CHECK-NEXT:    store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META0]], !noalias [[META3]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -140,16 +140,16 @@ define i32 @multi_variable_if_nest(ptr nocapture %A, ptr nocapture %B, i32 %n) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]]
-; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], <i32 19, i32 19, i32 19, i32 19>
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 19)
 ; CHECK-NEXT:    [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP10:%.*]] = and <4 x i1> [[TMP7]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], <i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> <i32 4, i32 4, i32 4, i32 4>, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
-; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> <i32 6, i32 6, i32 6, i32 6>, <4 x i32> <i32 11, i32 11, i32 11, i32 11>
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD2]], splat (i32 4)
+; CHECK-NEXT:    [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> splat (i32 4), <4 x i32> splat (i32 5)
+; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> splat (i32 6), <4 x i32> splat (i32 11)
 ; CHECK-NEXT:    [[TMP14:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 3), <4 x i32> splat (i32 9)
 ; CHECK-NEXT:    [[PREDPHI3:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP12]], <4 x i32> [[PREDPHI]]
-; CHECK-NEXT:    [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32> <i32 18, i32 18, i32 18, i32 18>
+; CHECK-NEXT:    [[PREDPHI4:%.*]] = select <4 x i1> [[TMP14]], <4 x i32> splat (i32 7), <4 x i32> splat (i32 18)
 ; CHECK-NEXT:    [[PREDPHI5:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP13]], <4 x i32> [[PREDPHI4]]
 ; CHECK-NEXT:    store <4 x i32> [[PREDPHI3]], ptr [[TMP5]], align 4, !alias.scope [[META9]], !noalias [[META12]]
 ; CHECK-NEXT:    store <4 x i32> [[PREDPHI5]], ptr [[TMP6]], align 4, !alias.scope [[META12]]
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
index ecb57c539a40ef..cf7f0a36f3e7ac 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll
@@ -46,18 +46,18 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud,
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASR]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[AUR]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4, !alias.scope !5, !noalias !8
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD23:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope !12, !noalias !13
+; CHECK-NEXT:    [[WIDE_LOAD23:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD24:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4, !alias.scope !14, !noalias !15
+; CHECK-NEXT:    [[WIDE_LOAD24:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD25:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope !15
-; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], <i32 23, i32 23>
-; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i32> [[WIDE_LOAD23]], <i32 24, i32 24>
-; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i32> [[WIDE_LOAD24]], <i32 25, i32 25>
-; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i32> [[WIDE_LOAD25]], <i32 26, i32 26>
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], <i32 100, i32 100>
+; CHECK-NEXT:    [[WIDE_LOAD25:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META15]]
+; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], splat (i32 23)
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i32> [[WIDE_LOAD23]], splat (i32 24)
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i32> [[WIDE_LOAD24]], splat (i32 25)
+; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i32> [[WIDE_LOAD25]], splat (i32 26)
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 100)
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
 ; CHECK:       pred.urem.if:
@@ -114,13 +114,13 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud,
 ; CHECK-NEXT:    [[PREDPHI29:%.*]] = select <2 x i1> [[TMP56]], <2 x i32> [[TMP11]], <2 x i32> [[TMP54]]
 ; CHECK-NEXT:    [[PREDPHI30:%.*]] = select <2 x i1> [[TMP56]], <2 x i32> [[TMP12]], <2 x i32> [[TMP55]]
 ; CHECK-NEXT:    [[TMP57:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP57]], align 4, !alias.scope !5, !noalias !8
+; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP57]], align 4, !alias.scope [[META5]], !noalias [[META8]]
 ; CHECK-NEXT:    [[TMP58:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
-; CHECK-NEXT:    store <2 x i32> [[PREDPHI28]], ptr [[TMP58]], align 4, !alias.scope !12, !noalias !13
+; CHECK-NEXT:    store <2 x i32> [[PREDPHI28]], ptr [[TMP58]], align 4, !alias.scope [[META12]], !noalias [[META13]]
 ; CHECK-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT:    store <2 x i32> [[PREDPHI29]], ptr [[TMP59]], align 4, !alias.scope !14, !noalias !15
+; CHECK-NEXT:    store <2 x i32> [[PREDPHI29]], ptr [[TMP59]], align 4, !alias.scope [[META14]], !noalias [[META15]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
-; CHECK-NEXT:    store <2 x i32> [[PREDPHI30]], ptr [[TMP60]], align 4, !alias.scope !15
+; CHECK-NEXT:    store <2 x i32> [[PREDPHI30]], ptr [[TMP60]], align 4, !alias.scope [[META15]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP61:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; CHECK-NEXT:    br i1 [[TMP61]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -207,14 +207,14 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud,
 ; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[ASR]], i64 [[TMP1]]
 ; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[AUR]], i64 [[TMP0]]
 ; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[AUR]], i64 [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !5, !noalias !8
-; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !5, !noalias !8
-; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope !12, !noalias !13
-; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope !12, !noalias !13
-; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !14, !noalias !15
-; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !14, !noalias !15
-; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope !15
-; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope !15
+; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META5]], !noalias [[META8]]
+; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope [[META12:![0-9]+]], !noalias [[META13:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope [[META12]], !noalias [[META13]]
+; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META14:![0-9]+]], !noalias [[META15:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META14]], !noalias [[META15]]
+; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META15]]
+; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META15]]
 ; UNROLL-NO-VF-NEXT:    [[TMP18:%.*]] = add nsw i32 [[TMP10]], 23
 ; UNROLL-NO-VF-NEXT:    [[TMP19:%.*]] = add nsw i32 [[TMP11]], 23
 ; UNROLL-NO-VF-NEXT:    [[TMP20:%.*]] = add nsw i32 [[TMP12]], 24
@@ -259,14 +259,14 @@ define void @test(ptr nocapture %asd, ptr nocapture %aud,
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI29:%.*]] = select i1 [[TMP45]], i32 [[TMP23]], i32 [[TMP42]]
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI30:%.*]] = select i1 [[TMP44]], i32 [[TMP24]], i32 [[TMP35]]
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI31:%.*]] = select i1 [[TMP45]], i32 [[TMP25]], i32 [[TMP43]]
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !5, !noalias !8
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI25]], ptr [[TMP3]], align 4, !alias.scope !5, !noalias !8
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI26]], ptr [[TMP4]], align 4, !alias.scope !12, !noalias !13
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI27]], ptr [[TMP5]], align 4, !alias.scope !12, !noalias !13
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI28]], ptr [[TMP6]], align 4, !alias.scope !14, !noalias !15
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI29]], ptr [[TMP7]], align 4, !alias.scope !14, !noalias !15
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI30]], ptr [[TMP8]], align 4, !alias.scope !15
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI31]], ptr [[TMP9]], align 4, !alias.scope !15
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope [[META5]], !noalias [[META8]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI25]], ptr [[TMP3]], align 4, !alias.scope [[META5]], !noalias [[META8]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI26]], ptr [[TMP4]], align 4, !alias.scope [[META12]], !noalias [[META13]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI27]], ptr [[TMP5]], align 4, !alias.scope [[META12]], !noalias [[META13]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI28]], ptr [[TMP6]], align 4, !alias.scope [[META14]], !noalias [[META15]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI29]], ptr [[TMP7]], align 4, !alias.scope [[META14]], !noalias [[META15]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI30]], ptr [[TMP8]], align 4, !alias.scope [[META15]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI31]], ptr [[TMP9]], align 4, !alias.scope [[META15]]
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; UNROLL-NO-VF-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; UNROLL-NO-VF-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -371,12 +371,12 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META23:![0-9]+]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope !23
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], <i32 23, i32 23>
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], <i32 100, i32 100>
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META23]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], splat (i32 23)
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 100)
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]]
 ; CHECK:       pred.sdiv.if:
@@ -404,7 +404,7 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = xor <2 x i1> [[TMP6]], <i1 true, i1 true>
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP5]], <2 x i32> [[TMP24]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP26]], align 4, !alias.scope !20, !noalias !23
+; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP26]], align 4, !alias.scope [[META20]], !noalias [[META23]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
@@ -448,12 +448,12 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) {
 ; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]]
 ; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23
-; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !20, !noalias !23
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope [[META20:![0-9]+]], !noalias [[META23:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META20]], !noalias [[META23]]
 ; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]]
 ; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !23
-; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !23
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META23]]
+; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META23]]
 ; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP4]], 23
 ; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = add nsw i32 [[TMP5]], 23
 ; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP4]], 100
@@ -476,8 +476,8 @@ define void @test_scalar2scalar(ptr nocapture %asd, ptr nocapture %bsd) {
 ; UNROLL-NO-VF-NEXT:    [[TMP23:%.*]] = xor i1 [[TMP13]], true
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI:%.*]] = select i1 [[TMP22]], i32 [[TMP10]], i32 [[TMP17]]
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI4:%.*]] = select i1 [[TMP23]], i32 [[TMP11]], i32 [[TMP21]]
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !20, !noalias !23
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope !20, !noalias !23
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope [[META20]], !noalias [[META23]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope [[META20]], !noalias [[META23]]
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; UNROLL-NO-VF-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; UNROLL-NO-VF-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
@@ -552,15 +552,15 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META29:![0-9]+]], !noalias [[META32:![0-9]+]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope !32
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], <i32 23, i32 23>
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], <i32 100, i32 100>
-; CHECK-NEXT:    [[TMP8:%.*]] = xor <2 x i1> [[TMP6]], <i1 true, i1 true>, !dbg [[DBG34:![0-9]+]]
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], <i32 200, i32 200>
-; CHECK-NEXT:    [[TMP9:%.*]] = select <2 x i1> [[TMP8]], <2 x i1> [[TMP7]], <2 x i1> zeroinitializer, !dbg [[DBG35:![0-9]+]]
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META32]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[WIDE_LOAD]], splat (i32 23)
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 100)
+; CHECK-NEXT:    [[TMP7:%.*]] = xor <2 x i1> [[TMP6]], <i1 true, i1 true>, !dbg [[DBG34:![0-9]+]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], splat (i32 200)
+; CHECK-NEXT:    [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP8]], <2 x i1> zeroinitializer, !dbg [[DBG35:![0-9]+]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = or <2 x i1> [[TMP9]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]]
@@ -586,11 +586,11 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; CHECK-NEXT:    br label [[PRED_SDIV_CONTINUE4]]
 ; CHECK:       pred.sdiv.continue4:
 ; CHECK-NEXT:    [[TMP28:%.*]] = phi <2 x i32> [ [[TMP19]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP26]], [[PRED_SDIV_IF3]] ]
-; CHECK-NEXT:    [[TMP29:%.*]] = xor <2 x i1> [[TMP7]], <i1 true, i1 true>, !dbg [[DBG35]]
-; CHECK-NEXT:    [[TMP30:%.*]] = select <2 x i1> [[TMP8]], <2 x i1> [[TMP29]], <2 x i1> zeroinitializer, !dbg [[DBG35]]
+; CHECK-NEXT:    [[TMP27:%.*]] = xor <2 x i1> [[TMP8]], <i1 true, i1 true>, !dbg [[DBG35]]
+; CHECK-NEXT:    [[TMP30:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP27]], <2 x i1> zeroinitializer, !dbg [[DBG35]]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP30]], <2 x i32> [[TMP5]], <2 x i32> [[TMP28]]
 ; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope !29, !noalias !32
+; CHECK-NEXT:    store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope [[META29]], !noalias [[META32]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; CHECK-NEXT:    br i1 [[TMP32]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
@@ -636,22 +636,22 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
 ; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP0]]
 ; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32
-; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !29, !noalias !32
+; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope [[META29:![0-9]+]], !noalias [[META32:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope [[META29]], !noalias [[META32]]
 ; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]]
 ; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]]
-; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !32
-; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !32
+; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META32]]
+; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META32]]
 ; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP4]], 23
 ; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = add nsw i32 [[TMP5]], 23
 ; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = icmp slt i32 [[TMP4]], 100
 ; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = icmp slt i32 [[TMP5]], 100
-; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = xor i1 [[TMP12]], true, !dbg [[DBG34:![0-9]+]]
-; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP13]], true, !dbg [[DBG34]]
-; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp sge i32 [[TMP4]], 200
-; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = icmp sge i32 [[TMP5]], 200
-; UNROLL-NO-VF-NEXT:    [[TMP18:%.*]] = select i1 [[TMP16]], i1 [[TMP14]], i1 false, !dbg [[DBG35:![0-9]+]]
-; UNROLL-NO-VF-NEXT:    [[TMP19:%.*]] = select i1 [[TMP17]], i1 [[TMP15]], i1 false, !dbg [[DBG35]]
+; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = xor i1 [[TMP12]], true, !dbg [[DBG34:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = xor i1 [[TMP13]], true, !dbg [[DBG34]]
+; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = icmp sge i32 [[TMP4]], 200, !dbg [[DBG34]]
+; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = icmp sge i32 [[TMP5]], 200, !dbg [[DBG34]]
+; UNROLL-NO-VF-NEXT:    [[TMP18:%.*]] = select i1 [[TMP14]], i1 [[TMP16]], i1 false, !dbg [[DBG35:![0-9]+]]
+; UNROLL-NO-VF-NEXT:    [[TMP19:%.*]] = select i1 [[TMP15]], i1 [[TMP17]], i1 false, !dbg [[DBG35]]
 ; UNROLL-NO-VF-NEXT:    [[TMP20:%.*]] = or i1 [[TMP18]], [[TMP12]]
 ; UNROLL-NO-VF-NEXT:    [[TMP21:%.*]] = or i1 [[TMP19]], [[TMP13]]
 ; UNROLL-NO-VF-NEXT:    br i1 [[TMP20]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]]
@@ -668,14 +668,14 @@ define void @pr30172(ptr nocapture %asd, ptr nocapture %bsd) !dbg !5 {;
 ; UNROLL-NO-VF-NEXT:    br label [[PRED_SDIV_CONTINUE3]]
 ; UNROLL-NO-VF:       pred.sdiv.continue3:
 ; UNROLL-NO-VF-NEXT:    [[TMP29:%.*]] = phi i32 [ poison, [[PRED_SDIV_CONTINUE]] ], [ [[TMP27]], [[PRED_SDIV_IF2]] ]
-; UNROLL-NO-VF-NEXT:    [[TMP30:%.*]] = xor i1 [[TMP14]], true, !dbg [[DBG35]]
-; UNROLL-NO-VF-NEXT:    [[TMP31:%.*]] = xor i1 [[TMP15]], true, !dbg [[DBG35]]
-; UNROLL-NO-VF-NEXT:    [[TMP32:%.*]] = select i1 [[TMP16]], i1 [[TMP30]], i1 false, !dbg [[DBG35]]
-; UNROLL-NO-VF-NEXT:    [[TMP33:%.*]] = select i1 [[TMP17]], i1 [[TMP31]], i1 false, !dbg [[DBG35]]
+; UNROLL-NO-VF-NEXT:    [[TMP28:%.*]] = xor i1 [[TMP16]], true, !dbg [[DBG35]]
+; UNROLL-NO-VF-NEXT:    [[TMP30:%.*]] = xor i1 [[TMP17]], true, !dbg [[DBG35]]
+; UNROLL-NO-VF-NEXT:    [[TMP32:%.*]] = select i1 [[TMP14]], i1 [[TMP28]], i1 false, !dbg [[DBG35]]
+; UNROLL-NO-VF-NEXT:    [[TMP33:%.*]] = select i1 [[TMP15]], i1 [[TMP30]], i1 false, !dbg [[DBG35]]
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI:%.*]] = select i1 [[TMP32]], i32 [[TMP10]], i32 [[TMP25]]
 ; UNROLL-NO-VF-NEXT:    [[PREDPHI4:%.*]] = select i1 [[TMP33]], i32 [[TMP11]], i32 [[TMP29]]
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope !29, !noalias !32
-; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope !29, !noalias !32
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI]], ptr [[TMP2]], align 4, !alias.scope [[META29]], !noalias [[META32]]
+; UNROLL-NO-VF-NEXT:    store i32 [[PREDPHI4]], ptr [[TMP3]], align 4, !alias.scope [[META29]], !noalias [[META32]]
 ; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; UNROLL-NO-VF-NEXT:    [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
 ; UNROLL-NO-VF-NEXT:    br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
index 255768d8794c36..c42d007b522482 100644
--- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -99,7 +99,7 @@ define i32 @test(ptr nocapture %f) #0 {
 ; VEC-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[F:%.*]], i64 [[TMP0]]
 ; VEC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; VEC-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
-; VEC-NEXT:    [[TMP3:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], <i32 100, i32 100>
+; VEC-NEXT:    [[TMP3:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], splat (i32 100)
 ; VEC-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
 ; VEC-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VEC:       pred.store.if:
@@ -350,7 +350,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) {
 ; VEC-NEXT:    store i32 [[TMP15]], ptr [[TMP14]], align 4
 ; VEC-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; VEC:       pred.store.continue2:
-; VEC-NEXT:    [[TMP16:%.*]] = add <2 x i32> [[VEC_PHI]], <i32 1, i32 1>
+; VEC-NEXT:    [[TMP16:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 1)
 ; VEC-NEXT:    [[TMP17:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true>
 ; VEC-NEXT:    [[PREDPHI]] = select <2 x i1> [[TMP17]], <2 x i32> [[VEC_PHI]], <2 x i32> [[TMP16]]
 ; VEC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
index 383b62b368ef0f..514ca47a37635c 100644
--- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
@@ -1216,13 +1216,13 @@ define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly {
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP9]]
 ; CHECK-NEXT:    [[PREDPHI1:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[WIDE_LOAD]], <4 x float> [[PREDPHI]]
 ; CHECK-NEXT:    [[TMP10]] = fadd fast <4 x float> [[PREDPHI1]], [[VEC_PHI]]
@@ -1342,9 +1342,9 @@ define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonl
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    [[TMP6:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
@@ -1604,7 +1604,7 @@ define i64 @fcmp_0_add_select2(ptr noalias %x, i64 %N) nounwind readonly {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[VEC_PHI]], <i64 2, i64 2, i64 2, i64 2>
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[VEC_PHI]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP4]], <4 x i64> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1686,7 +1686,7 @@ define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly {
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[REVERSE]], zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = sub <4 x i32> [[VEC_PHI]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP6:%.*]] = sub <4 x i32> [[VEC_PHI]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP7]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -1764,7 +1764,7 @@ define i32 @fcmp_0_mult_select1(ptr noalias %x, i32 %N) nounwind readonly {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index b061cf526b7809..adce8c2c0ff7eb 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -31,7 +31,7 @@ define void @multi_int_induction(ptr %A, i32 %N) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -72,7 +72,7 @@ define void @multi_int_induction(ptr %A, i32 %N) {
 ; IND-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
 ; IND-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; IND-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; IND-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; IND:       middle.block:
@@ -110,13 +110,13 @@ define void @multi_int_induction(ptr %A, i32 %N) {
 ; UNROLL:       vector.body:
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
 ; UNROLL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
+; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
 ; UNROLL-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; UNROLL:       middle.block:
@@ -155,7 +155,7 @@ define void @multi_int_induction(ptr %A, i32 %N) {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]]
 ; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
@@ -163,7 +163,7 @@ define void @multi_int_induction(ptr %A, i32 %N) {
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP5]], align 4
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP6]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -201,13 +201,13 @@ define void @multi_int_induction(ptr %A, i32 %N) {
 ; INTERLEAVE:       vector.body:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 190, i32 191, i32 192, i32 193>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP3]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
 ; INTERLEAVE-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; INTERLEAVE-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; INTERLEAVE:       middle.block:
@@ -891,7 +891,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
 ; CHECK-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0
 ; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i32 1
-; CHECK-NEXT:    [[TMP17:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP17:%.*]] = fadd fast <2 x float> [[VEC_PHI]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP18:%.*]] = fadd fast <2 x float> [[TMP17]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP19]] = fadd fast <2 x float> [[TMP18]], [[TMP16]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -951,7 +951,7 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
 ; IND-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4
 ; IND-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i64 0
 ; IND-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP13]], i64 1
-; IND-NEXT:    [[TMP16:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00>
+; IND-NEXT:    [[TMP16:%.*]] = fadd fast <2 x float> [[VEC_PHI]], splat (float 1.000000e+00)
 ; IND-NEXT:    [[TMP17:%.*]] = fadd fast <2 x float> [[TMP16]], [[TMP9]]
 ; IND-NEXT:    [[TMP18]] = fadd fast <2 x float> [[TMP17]], [[TMP15]]
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
@@ -1026,8 +1026,8 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
 ; UNROLL-NEXT:    [[TMP27:%.*]] = load float, ptr [[TMP21]], align 4
 ; UNROLL-NEXT:    [[TMP28:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0
 ; UNROLL-NEXT:    [[TMP29:%.*]] = insertelement <2 x float> [[TMP28]], float [[TMP27]], i64 1
-; UNROLL-NEXT:    [[TMP30:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00>
-; UNROLL-NEXT:    [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], <float 1.000000e+00, float 1.000000e+00>
+; UNROLL-NEXT:    [[TMP30:%.*]] = fadd fast <2 x float> [[VEC_PHI]], splat (float 1.000000e+00)
+; UNROLL-NEXT:    [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], splat (float 1.000000e+00)
 ; UNROLL-NEXT:    [[TMP32:%.*]] = fadd fast <2 x float> [[TMP30]], [[TMP13]]
 ; UNROLL-NEXT:    [[TMP33:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP17]]
 ; UNROLL-NEXT:    [[TMP34]] = fadd fast <2 x float> [[TMP32]], [[TMP25]]
@@ -1107,8 +1107,8 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
 ; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = load float, ptr [[TMP22]], align 4
 ; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i32 0
 ; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP28]], i32 1
-; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00>
-; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], <float 1.000000e+00, float 1.000000e+00>
+; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI]], splat (float 1.000000e+00)
+; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], splat (float 1.000000e+00)
 ; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP14]]
 ; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = fadd fast <2 x float> [[TMP32]], [[TMP18]]
 ; UNROLL-NO-IC-NEXT:    [[TMP35]] = fadd fast <2 x float> [[TMP33]], [[TMP26]]
@@ -1174,8 +1174,8 @@ define float @scalarize_induction_variable_02(ptr %a, ptr %b, i64 %n) {
 ; INTERLEAVE-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <32 x float> [[WIDE_VEC4]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
 ; INTERLEAVE-NEXT:    [[WIDE_VEC6:%.*]] = load <32 x float>, ptr [[TMP9]], align 4
 ; INTERLEAVE-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <32 x float> [[WIDE_VEC6]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
-; INTERLEAVE-NEXT:    [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
-; INTERLEAVE-NEXT:    [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; INTERLEAVE-NEXT:    [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_PHI]], splat (float 1.000000e+00)
+; INTERLEAVE-NEXT:    [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], splat (float 1.000000e+00)
 ; INTERLEAVE-NEXT:    [[TMP12:%.*]] = fadd fast <4 x float> [[TMP10]], [[STRIDED_VEC]]
 ; INTERLEAVE-NEXT:    [[TMP13:%.*]] = fadd fast <4 x float> [[TMP11]], [[STRIDED_VEC3]]
 ; INTERLEAVE-NEXT:    [[TMP14]] = fadd fast <4 x float> [[TMP12]], [[STRIDED_VEC5]]
@@ -1574,7 +1574,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP11:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP11:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
@@ -1586,7 +1586,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
 ; CHECK-NEXT:    store i32 [[TMP16]], ptr [[TMP18]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]]
 ; CHECK-NEXT:    store i32 [[TMP17]], ptr [[TMP19]], align 1, !alias.scope [[META20]], !noalias [[META17]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1637,7 +1637,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
 ; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; IND-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; IND-NEXT:    [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 1
-; IND-NEXT:    [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; IND-NEXT:    [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2)
 ; IND-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i64 0
 ; IND-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
 ; IND-NEXT:    [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i64 1
@@ -1649,7 +1649,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
 ; IND-NEXT:    store i32 [[TMP15]], ptr [[TMP17]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]]
 ; IND-NEXT:    store i32 [[TMP16]], ptr [[TMP18]], align 1, !alias.scope [[META20]], !noalias [[META17]]
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; IND-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; IND-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; IND:       middle.block:
@@ -1702,9 +1702,9 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
 ; UNROLL-NEXT:    [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 1
 ; UNROLL-NEXT:    [[TMP10:%.*]] = or disjoint i64 [[INDEX]], 2
 ; UNROLL-NEXT:    [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 3
-; UNROLL-NEXT:    [[TMP12:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; UNROLL-NEXT:    [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; UNROLL-NEXT:    [[TMP13:%.*]] = add <2 x i64> [[STEP_ADD]], <i64 8, i64 8>
+; UNROLL-NEXT:    [[TMP12:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2)
+; UNROLL-NEXT:    [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], splat (i64 2)
+; UNROLL-NEXT:    [[TMP13:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 8)
 ; UNROLL-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i64 0
 ; UNROLL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
 ; UNROLL-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i64 1
@@ -1726,7 +1726,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
 ; UNROLL-NEXT:    store i32 [[TMP24]], ptr [[TMP28]], align 1, !alias.scope [[META20]], !noalias [[META17]]
 ; UNROLL-NEXT:    store i32 [[TMP25]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]]
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
+; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
 ; UNROLL-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; UNROLL:       middle.block:
@@ -1777,13 +1777,13 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 1
 ; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 2
 ; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 3
-; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = shl nsw <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
+; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = shl nsw <2 x i64> [[VEC_IND]], splat (i64 2)
+; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = shl nsw <2 x i64> [[STEP_ADD]], splat (i64 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
 ; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP15]]
 ; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
@@ -1805,7 +1805,7 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
 ; UNROLL-NO-IC-NEXT:    store i32 [[TMP25]], ptr [[TMP29]], align 1, !alias.scope [[META20]], !noalias [[META17]]
 ; UNROLL-NO-IC-NEXT:    store i32 [[TMP26]], ptr [[TMP30]], align 1, !alias.scope [[META20]], !noalias [[META17]]
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -2465,7 +2465,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1
 ; CHECK-NEXT:    store i16 [[TMP10]], ptr [[TMP8]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
@@ -2513,7 +2513,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; IND-NEXT:    [[TMP9:%.*]] = extractelement <2 x i16> [[TMP5]], i64 1
 ; IND-NEXT:    store i16 [[TMP9]], ptr [[TMP7]], align 2
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; IND-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; IND-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; IND:       middle.block:
@@ -2551,7 +2551,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; UNROLL:       vector.body:
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NEXT:    [[TMP3:%.*]] = or disjoint i64 [[INDEX]], 1
 ; UNROLL-NEXT:    [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 2
 ; UNROLL-NEXT:    [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 3
@@ -2572,7 +2572,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; UNROLL-NEXT:    [[TMP17:%.*]] = extractelement <2 x i16> [[TMP9]], i64 1
 ; UNROLL-NEXT:    store i16 [[TMP17]], ptr [[TMP13]], align 2
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
+; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
 ; UNROLL-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; UNROLL:       middle.block:
@@ -2611,7 +2611,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 1
 ; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 2
@@ -2633,7 +2633,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = extractelement <2 x i16> [[TMP10]], i32 1
 ; UNROLL-NO-IC-NEXT:    store i16 [[TMP18]], ptr [[TMP14]], align 2
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -2671,7 +2671,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; INTERLEAVE:       vector.body:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = or disjoint i64 [[INDEX]], 1
 ; INTERLEAVE-NEXT:    [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 2
 ; INTERLEAVE-NEXT:    [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 3
@@ -2708,7 +2708,7 @@ define void @iv_vector_and_scalar_users(ptr %p, i32 %a, i32 %n) {
 ; INTERLEAVE-NEXT:    [[TMP29:%.*]] = extractelement <4 x i16> [[TMP13]], i64 3
 ; INTERLEAVE-NEXT:    store i16 [[TMP29]], ptr [[TMP21]], align 2
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
 ; INTERLEAVE-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; INTERLEAVE-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; INTERLEAVE:       middle.block:
@@ -2765,7 +2765,7 @@ define i32 @i8_loop() nounwind readnone ssp uwtable {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
+; CHECK-NEXT:    [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
@@ -2833,9 +2833,9 @@ define i32 @i8_loop() nounwind readnone ssp uwtable {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[TMP0]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
-; UNROLL-NO-IC-NEXT:    [[TMP1]] = and <2 x i32> [[VEC_PHI1]], <i32 4, i32 4>
+; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4)
+; UNROLL-NO-IC-NEXT:    [[TMP1]] = and <2 x i32> [[VEC_PHI1]], splat (i32 4)
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
@@ -2902,7 +2902,7 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
+; CHECK-NEXT:    [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
@@ -2970,9 +2970,9 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[TMP0]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
-; UNROLL-NO-IC-NEXT:    [[TMP1]] = and <2 x i32> [[VEC_PHI1]], <i32 4, i32 4>
+; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4)
+; UNROLL-NO-IC-NEXT:    [[TMP1]] = and <2 x i32> [[VEC_PHI1]], splat (i32 4)
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
@@ -3043,7 +3043,7 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
+; CHECK-NEXT:    [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
@@ -3111,9 +3111,9 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[TMP0]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
-; UNROLL-NO-IC-NEXT:    [[TMP1]] = and <2 x i32> [[VEC_PHI1]], <i32 4, i32 4>
+; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[TMP0]] = and <2 x i32> [[VEC_PHI]], splat (i32 4)
+; UNROLL-NO-IC-NEXT:    [[TMP1]] = and <2 x i32> [[VEC_PHI1]], splat (i32 4)
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
@@ -3194,7 +3194,7 @@ define i32 @testoverflowcheck() {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]]
 ; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
 ; CHECK-NEXT:    [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> <i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3323,14 +3323,14 @@ define i32 @testoverflowcheck() {
 ; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]]
 ; UNROLL-NO-IC-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8
 ; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[DOTCAST]]
-; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> <i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i32 0
+; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> splat (i32 -1), i32 [[C_PROMOTED_I]], i32 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[TMP5]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]]
 ; UNROLL-NO-IC-NEXT:    [[TMP6]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI1]]
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
@@ -3465,7 +3465,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
 ; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
 ; CHECK:       middle.block:
@@ -3530,7 +3530,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; IND-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
 ; IND-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; IND-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; IND-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
 ; IND:       middle.block:
@@ -3590,7 +3590,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; UNROLL:       vector.body:
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NEXT:    [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8
 ; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]]
 ; UNROLL-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
@@ -3599,7 +3599,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
+; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
 ; UNROLL-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
 ; UNROLL:       middle.block:
@@ -3663,7 +3663,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8
 ; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]]
 ; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add i8 [[OFFSET_IDX]], 0
@@ -3673,7 +3673,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP14]], align 4
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP15]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -3732,7 +3732,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; INTERLEAVE:       vector.body:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; INTERLEAVE-NEXT:    [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8
 ; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]]
 ; INTERLEAVE-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
@@ -3741,7 +3741,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) {
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
 ; INTERLEAVE-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; INTERLEAVE-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
 ; INTERLEAVE:       middle.block:
@@ -3845,7 +3845,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
 ; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
 ; CHECK:       middle.block:
@@ -3913,7 +3913,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; IND-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]]
 ; IND-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
+; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8)
 ; IND-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; IND-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
 ; IND:       middle.block:
@@ -3976,7 +3976,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; UNROLL:       vector.body:
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
+; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 8)
 ; UNROLL-NEXT:    [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8
 ; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]]
 ; UNROLL-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
@@ -3985,7 +3985,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 16, i32 16>
+; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 16)
 ; UNROLL-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
 ; UNROLL:       middle.block:
@@ -4052,7 +4052,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 8)
 ; UNROLL-NO-IC-NEXT:    [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8
 ; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]]
 ; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = add i8 [[OFFSET_IDX]], 0
@@ -4062,7 +4062,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP15]], align 4
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP16]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 8, i32 8>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 8)
 ; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -4124,7 +4124,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; INTERLEAVE:       vector.body:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 16, i32 16, i32 16, i32 16>
+; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 16)
 ; INTERLEAVE-NEXT:    [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8
 ; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]]
 ; INTERLEAVE-NEXT:    [[TMP10:%.*]] = sext i8 [[OFFSET_IDX]] to i64
@@ -4133,7 +4133,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) {
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP11]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP12]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 32, i32 32, i32 32, i32 32>
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 32)
 ; INTERLEAVE-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; INTERLEAVE-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
 ; INTERLEAVE:       middle.block:
@@ -4211,7 +4211,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
 ; CHECK:       middle.block:
@@ -4244,7 +4244,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
 ; IND-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
 ; IND-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; IND-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; IND-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
 ; IND:       middle.block:
@@ -4274,14 +4274,14 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
 ; UNROLL:       vector.body:
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
 ; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
+; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
 ; UNROLL-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
 ; UNROLL:       middle.block:
@@ -4312,7 +4312,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
@@ -4320,7 +4320,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -4349,14 +4349,14 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) {
 ; INTERLEAVE:       vector.body:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
 ; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; INTERLEAVE-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
 ; INTERLEAVE:       middle.block:
@@ -4416,7 +4416,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
 ; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
 ; CHECK:       middle.block:
@@ -4454,7 +4454,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; IND-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
 ; IND-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; IND-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; IND-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
 ; IND:       middle.block:
@@ -4489,7 +4489,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; UNROLL:       vector.body:
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NEXT:    [[SEXT:%.*]] = shl i64 [[INDEX]], 32
 ; UNROLL-NEXT:    [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
 ; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
@@ -4497,7 +4497,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
+; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
 ; UNROLL-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
 ; UNROLL:       middle.block:
@@ -4537,7 +4537,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32
 ; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP6]]
@@ -4546,7 +4546,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP8]], align 4
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP9]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -4579,7 +4579,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; INTERLEAVE:       vector.body:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; INTERLEAVE-NEXT:    [[SEXT:%.*]] = shl i64 [[INDEX]], 32
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 30
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
@@ -4587,7 +4587,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP1]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP2]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; INTERLEAVE-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
 ; INTERLEAVE:       middle.block:
@@ -4650,7 +4650,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
 ; CHECK:       middle.block:
@@ -4689,7 +4689,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
 ; IND-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]]
 ; IND-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; IND-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; IND-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
 ; IND:       middle.block:
@@ -4724,7 +4724,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
 ; UNROLL:       vector.body:
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]]
 ; UNROLL-NEXT:    [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64
 ; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]]
@@ -4732,7 +4732,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
+; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
 ; UNROLL-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
 ; UNROLL:       middle.block:
@@ -4768,7 +4768,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]]
 ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP1]]
@@ -4777,7 +4777,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP3]], align 4
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -4811,7 +4811,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
 ; INTERLEAVE:       vector.body:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]]
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64
 ; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]]
@@ -4819,7 +4819,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) {
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP2]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
 ; INTERLEAVE-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; INTERLEAVE-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
 ; INTERLEAVE:       middle.block:
@@ -4873,7 +4873,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
 ; CHECK:       middle.block:
@@ -4911,7 +4911,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
 ; IND-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
 ; IND-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP0]], align 4
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
+; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
 ; IND-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; IND-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
 ; IND:       middle.block:
@@ -4946,13 +4946,13 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
 ; UNROLL:       vector.body:
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
+; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
 ; UNROLL-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
 ; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8
 ; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP0]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP1]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 8, i32 8>
+; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 8)
 ; UNROLL-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
 ; UNROLL:       middle.block:
@@ -4988,7 +4988,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
 ; UNROLL-NO-IC:       vector.body:
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
@@ -4996,7 +4996,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 4, i32 4>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 4)
 ; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -5031,13 +5031,13 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) {
 ; INTERLEAVE:       vector.body:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
+; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
 ; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], ptr [[TMP0]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], ptr [[TMP1]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 16, i32 16, i32 16, i32 16>
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 16)
 ; INTERLEAVE-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; INTERLEAVE-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
 ; INTERLEAVE:       middle.block:
@@ -5121,7 +5121,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
 ; CHECK-NEXT:    [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], <i16 2, i16 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
 ; CHECK:       middle.block:
@@ -5185,7 +5185,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
 ; IND-NEXT:    [[TMP13:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32>
 ; IND-NEXT:    [[TMP14]] = or <2 x i32> [[VEC_PHI]], [[TMP13]]
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], <i16 2, i16 2>
+; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
 ; IND-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
 ; IND-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
 ; IND:       middle.block:
@@ -5216,7 +5216,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE7]] ]
 ; UNROLL-NEXT:    [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16
 ; UNROLL-NEXT:    [[TMP2:%.*]] = icmp ne <2 x i16> [[VEC_IND]], zeroinitializer
-; UNROLL-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i16> [[VEC_IND]], <i16 -2, i16 -2>
+; UNROLL-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i16> [[VEC_IND]], splat (i16 -2)
 ; UNROLL-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0
 ; UNROLL-NEXT:    br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
 ; UNROLL:       pred.urem.if:
@@ -5260,7 +5260,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
 ; UNROLL-NEXT:    [[TMP26]] = or <2 x i32> [[VEC_PHI]], [[TMP24]]
 ; UNROLL-NEXT:    [[TMP27]] = or <2 x i32> [[VEC_PHI1]], [[TMP25]]
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], <i16 4, i16 4>
+; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 4)
 ; UNROLL-NEXT:    [[TMP28:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
 ; UNROLL-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
 ; UNROLL:       middle.block:
@@ -5290,7 +5290,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
 ; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UREM_CONTINUE7]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UREM_CONTINUE7]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE7]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i16> [[VEC_IND]], <i16 2, i16 2>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
 ; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]]
 ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND]], zeroinitializer
@@ -5340,7 +5340,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
 ; UNROLL-NO-IC-NEXT:    [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]]
 ; UNROLL-NO-IC-NEXT:    [[TMP29]] = or <2 x i32> [[VEC_PHI1]], [[TMP27]]
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[STEP_ADD]], <i16 2, i16 2>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[STEP_ADD]], splat (i16 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -5384,7 +5384,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -20, i16 -19, i16 -18, i16 -17>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE15]] ]
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16
 ; INTERLEAVE-NEXT:    [[TMP2:%.*]] = icmp ne <4 x i16> [[VEC_IND]], zeroinitializer
-; INTERLEAVE-NEXT:    [[TMP3:%.*]] = icmp ne <4 x i16> [[VEC_IND]], <i16 -4, i16 -4, i16 -4, i16 -4>
+; INTERLEAVE-NEXT:    [[TMP3:%.*]] = icmp ne <4 x i16> [[VEC_IND]], splat (i16 -4)
 ; INTERLEAVE-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
 ; INTERLEAVE-NEXT:    br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
 ; INTERLEAVE:       pred.urem.if:
@@ -5464,7 +5464,7 @@ define i32 @PR32419(i32 %a, i16 %b) {
 ; INTERLEAVE-NEXT:    [[TMP46]] = or <4 x i32> [[VEC_PHI]], [[TMP44]]
 ; INTERLEAVE-NEXT:    [[TMP47]] = or <4 x i32> [[VEC_PHI1]], [[TMP45]]
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 8, i16 8, i16 8, i16 8>
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 8)
 ; INTERLEAVE-NEXT:    [[TMP48:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
 ; INTERLEAVE-NEXT:    br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
 ; INTERLEAVE:       middle.block:
@@ -5541,19 +5541,19 @@ define i64 @trunc_with_first_order_recurrence() {
 ; CHECK-NEXT:    [[VEC_IND4:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND2]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 42, i32 42>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], splat (i32 42)
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
-; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2>
-; CHECK-NEXT:    [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2)
+; CHECK-NEXT:    [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
 ; CHECK:       middle.block:
@@ -5605,19 +5605,19 @@ define i64 @trunc_with_first_order_recurrence() {
 ; IND-NEXT:    [[VEC_IND4:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
 ; IND-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> <i32 1, i32 2>
 ; IND-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND2]]
-; IND-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 42, i32 42>
+; IND-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], splat (i32 42)
 ; IND-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND2]]
 ; IND-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]]
 ; IND-NEXT:    [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
 ; IND-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]]
-; IND-NEXT:    [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], <i32 1, i32 1>
+; IND-NEXT:    [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND4]], splat (i32 1)
 ; IND-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]]
 ; IND-NEXT:    [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
 ; IND-NEXT:    [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]]
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
-; IND-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2>
-; IND-NEXT:    [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2>
+; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
+; IND-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], splat (i32 2)
+; IND-NEXT:    [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], splat (i32 2)
 ; IND-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
 ; IND-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
 ; IND:       middle.block:
@@ -5666,14 +5666,14 @@ define i64 @trunc_with_first_order_recurrence() {
 ; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND5:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
-; UNROLL-NEXT:    [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], <i32 2, i32 2>
+; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
+; UNROLL-NEXT:    [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], splat (i32 2)
 ; UNROLL-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> <i32 1, i32 2>
 ; UNROLL-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD7]], <2 x i32> <i32 1, i32 2>
 ; UNROLL-NEXT:    [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND3]]
 ; UNROLL-NEXT:    [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD]], [[STEP_ADD7]]
-; UNROLL-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP2]], <i32 42, i32 42>
-; UNROLL-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], <i32 42, i32 42>
+; UNROLL-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP2]], splat (i32 42)
+; UNROLL-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], splat (i32 42)
 ; UNROLL-NEXT:    [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND3]]
 ; UNROLL-NEXT:    [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD7]]
 ; UNROLL-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]]
@@ -5682,9 +5682,9 @@ define i64 @trunc_with_first_order_recurrence() {
 ; UNROLL-NEXT:    [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64>
 ; UNROLL-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]]
 ; UNROLL-NEXT:    [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]]
-; UNROLL-NEXT:    [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], <i32 1, i32 1>
-; UNROLL-NEXT:    [[STEP_ADD8:%.*]] = shl <2 x i32> [[VEC_IND5]], <i32 1, i32 1>
-; UNROLL-NEXT:    [[TMP15:%.*]] = add <2 x i32> [[STEP_ADD8]], <i32 4, i32 4>
+; UNROLL-NEXT:    [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], splat (i32 1)
+; UNROLL-NEXT:    [[STEP_ADD8:%.*]] = shl <2 x i32> [[VEC_IND5]], splat (i32 1)
+; UNROLL-NEXT:    [[TMP15:%.*]] = add <2 x i32> [[STEP_ADD8]], splat (i32 4)
 ; UNROLL-NEXT:    [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]]
 ; UNROLL-NEXT:    [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]]
 ; UNROLL-NEXT:    [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64>
@@ -5692,9 +5692,9 @@ define i64 @trunc_with_first_order_recurrence() {
 ; UNROLL-NEXT:    [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]]
 ; UNROLL-NEXT:    [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]]
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
-; UNROLL-NEXT:    [[VEC_IND_NEXT4]] = add <2 x i32> [[VEC_IND3]], <i32 4, i32 4>
-; UNROLL-NEXT:    [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND5]], <i32 4, i32 4>
+; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
+; UNROLL-NEXT:    [[VEC_IND_NEXT4]] = add <2 x i32> [[VEC_IND3]], splat (i32 4)
+; UNROLL-NEXT:    [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND5]], splat (i32 4)
 ; UNROLL-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
 ; UNROLL-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
 ; UNROLL:       middle.block:
@@ -5744,15 +5744,15 @@ define i64 @trunc_with_first_order_recurrence() {
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND5:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], <i32 2, i32 2>
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD8:%.*]] = add <2 x i32> [[VEC_IND5]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD7]] = add <2 x i32> [[VEC_IND3]], splat (i32 2)
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD8:%.*]] = add <2 x i32> [[VEC_IND5]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> <i32 1, i32 2>
 ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD7]], <2 x i32> <i32 1, i32 2>
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[VEC_IND3]]
 ; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD]], [[STEP_ADD7]]
-; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP2]], <i32 42, i32 42>
-; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], <i32 42, i32 42>
+; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP2]], splat (i32 42)
+; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], splat (i32 42)
 ; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND3]]
 ; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD7]]
 ; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]]
@@ -5761,8 +5761,8 @@ define i64 @trunc_with_first_order_recurrence() {
 ; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64>
 ; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]]
 ; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]]
-; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], <i32 1, i32 1>
-; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = shl <2 x i32> [[STEP_ADD8]], <i32 1, i32 1>
+; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND5]], splat (i32 1)
+; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = shl <2 x i32> [[STEP_ADD8]], splat (i32 1)
 ; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]]
 ; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]]
 ; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64>
@@ -5770,9 +5770,9 @@ define i64 @trunc_with_first_order_recurrence() {
 ; UNROLL-NO-IC-NEXT:    [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]]
 ; UNROLL-NO-IC-NEXT:    [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]]
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT4]] = add <2 x i32> [[STEP_ADD7]], <i32 2, i32 2>
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD8]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT4]] = add <2 x i32> [[STEP_ADD7]], splat (i32 2)
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD8]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -5824,14 +5824,14 @@ define i64 @trunc_with_first_order_recurrence() {
 ; INTERLEAVE-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD7:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND5:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
-; INTERLEAVE-NEXT:    [[STEP_ADD7]] = add <4 x i32> [[VEC_IND3]], <i32 4, i32 4, i32 4, i32 4>
+; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; INTERLEAVE-NEXT:    [[STEP_ADD7]] = add <4 x i32> [[VEC_IND3]], splat (i32 4)
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND3]], <4 x i32> [[STEP_ADD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; INTERLEAVE-NEXT:    [[TMP2:%.*]] = mul <4 x i32> [[VEC_IND]], [[VEC_IND3]]
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[STEP_ADD]], [[STEP_ADD7]]
-; INTERLEAVE-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP2]], <i32 42, i32 42, i32 42, i32 42>
-; INTERLEAVE-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], <i32 42, i32 42, i32 42, i32 42>
+; INTERLEAVE-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP2]], splat (i32 42)
+; INTERLEAVE-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], splat (i32 42)
 ; INTERLEAVE-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP0]], [[VEC_IND3]]
 ; INTERLEAVE-NEXT:    [[TMP7:%.*]] = add <4 x i32> [[TMP1]], [[STEP_ADD7]]
 ; INTERLEAVE-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP6]], [[TMP4]]
@@ -5840,9 +5840,9 @@ define i64 @trunc_with_first_order_recurrence() {
 ; INTERLEAVE-NEXT:    [[TMP11:%.*]] = sext <4 x i32> [[TMP9]] to <4 x i64>
 ; INTERLEAVE-NEXT:    [[TMP12:%.*]] = add <4 x i64> [[VEC_PHI]], [[TMP10]]
 ; INTERLEAVE-NEXT:    [[TMP13:%.*]] = add <4 x i64> [[VEC_PHI2]], [[TMP11]]
-; INTERLEAVE-NEXT:    [[TMP14:%.*]] = shl <4 x i32> [[VEC_IND5]], <i32 1, i32 1, i32 1, i32 1>
-; INTERLEAVE-NEXT:    [[STEP_ADD8:%.*]] = shl <4 x i32> [[VEC_IND5]], <i32 1, i32 1, i32 1, i32 1>
-; INTERLEAVE-NEXT:    [[TMP15:%.*]] = add <4 x i32> [[STEP_ADD8]], <i32 8, i32 8, i32 8, i32 8>
+; INTERLEAVE-NEXT:    [[TMP14:%.*]] = shl <4 x i32> [[VEC_IND5]], splat (i32 1)
+; INTERLEAVE-NEXT:    [[STEP_ADD8:%.*]] = shl <4 x i32> [[VEC_IND5]], splat (i32 1)
+; INTERLEAVE-NEXT:    [[TMP15:%.*]] = add <4 x i32> [[STEP_ADD8]], splat (i32 8)
 ; INTERLEAVE-NEXT:    [[TMP16:%.*]] = add <4 x i32> [[TMP4]], [[TMP14]]
 ; INTERLEAVE-NEXT:    [[TMP17:%.*]] = add <4 x i32> [[TMP5]], [[TMP15]]
 ; INTERLEAVE-NEXT:    [[TMP18:%.*]] = sext <4 x i32> [[TMP16]] to <4 x i64>
@@ -5850,9 +5850,9 @@ define i64 @trunc_with_first_order_recurrence() {
 ; INTERLEAVE-NEXT:    [[TMP20]] = add <4 x i64> [[TMP12]], [[TMP18]]
 ; INTERLEAVE-NEXT:    [[TMP21]] = add <4 x i64> [[TMP13]], [[TMP19]]
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT4]] = add <4 x i32> [[VEC_IND3]], <i32 8, i32 8, i32 8, i32 8>
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND5]], <i32 8, i32 8, i32 8, i32 8>
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT4]] = add <4 x i32> [[VEC_IND3]], splat (i32 8)
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND5]], splat (i32 8)
 ; INTERLEAVE-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
 ; INTERLEAVE-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
 ; INTERLEAVE:       middle.block:
@@ -5944,7 +5944,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    store <2 x i32> [[TMP6]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
 ; CHECK:       middle.block:
@@ -5992,7 +5992,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
 ; IND-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP2]]
 ; IND-NEXT:    store <2 x i32> [[TMP5]], ptr [[TMP4]], align 4
 ; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; IND-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; IND-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
 ; IND:       middle.block:
@@ -6013,7 +6013,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NEXT:    [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NEXT:    [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
 ; UNROLL-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> <i32 1, i32 2>
 ; UNROLL-NEXT:    [[TMP2:%.*]] = load i32, ptr [[SRC:%.*]], align 4
@@ -6030,7 +6030,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
 ; UNROLL-NEXT:    store <2 x i32> [[TMP7]], ptr [[TMP6]], align 4
 ; UNROLL-NEXT:    store <2 x i32> [[TMP8]], ptr [[TMP9]], align 4
 ; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
+; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4)
 ; UNROLL-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; UNROLL-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
 ; UNROLL:       middle.block:
@@ -6051,7 +6051,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
 ; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
 ; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-IC-NEXT:    [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = trunc i64 [[INDEX]] to i32
 ; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 0
 ; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
@@ -6069,7 +6069,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[TMP8]], ptr [[TMP10]], align 4
 ; UNROLL-NO-IC-NEXT:    store <2 x i32> [[TMP9]], ptr [[TMP11]], align 4
 ; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
+; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
 ; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
 ; UNROLL-NO-IC:       middle.block:
@@ -6106,7 +6106,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
 ; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT:    [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; INTERLEAVE-NEXT:    [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; INTERLEAVE-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; INTERLEAVE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
 ; INTERLEAVE-NEXT:    [[TMP2:%.*]] = load i32, ptr [[SRC:%.*]], align 4
@@ -6123,7 +6123,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr %
 ; INTERLEAVE-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP6]], align 4
 ; INTERLEAVE-NEXT:    store <4 x i32> [[TMP8]], ptr [[TMP9]], align 4
 ; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
+; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
 ; INTERLEAVE-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
 ; INTERLEAVE-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
 ; INTERLEAVE:       middle.block:
@@ -6435,7 +6435,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n
 ; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = mul i32 [[DOTCAST]], [[STEP]]
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = mul <2 x i32> <i32 2, i32 2>, [[BROADCAST_SPLAT]]
+; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = mul <2 x i32> splat (i32 2), [[BROADCAST_SPLAT]]
 ; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
 ; UNROLL-NO-IC-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
diff --git a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll
index 553fc374e0fdf2..318ccdb8afa160 100644
--- a/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/instruction-only-used-outside-of-loop.ll
@@ -14,7 +14,7 @@ define i32 @one_direct_branch(ptr %src) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i32> <i32 25500, i32 25500, i32 25500, i32 25500>, [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i32> splat (i32 25500), [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -72,7 +72,7 @@ define i32 @two_direct_branch(ptr %src) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i32> <i32 25500, i32 25500, i32 25500, i32 25500>, [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i32> splat (i32 25500), [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -140,12 +140,12 @@ define i32 @cond_branch(i32 %a, ptr %src) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i32> <i32 25500, i32 25500, i32 25500, i32 25500>, [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i32> splat (i32 25500), [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i32> [[TMP3]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> splat (i32 10), <4 x i32> [[TMP3]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -210,7 +210,7 @@ define i32 @optimizable_trunc_used_outside() {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
index 2878786cb972a5..c90ae64de3f8f3 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -112,10 +112,10 @@ define void @test_struct_array_load3_store3() {
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
 ; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
 ; CHECK-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <12 x i32> [[WIDE_VEC]], <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
-; CHECK-NEXT:    [[TMP0:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1024 x %struct.ST3], ptr @S, i64 0, i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[STRIDED_VEC2]], <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1)
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1024 x %struct.ST3], ptr @S, i64 0, i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[STRIDED_VEC2]], splat (i32 2)
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC3]], splat (i32 3)
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
@@ -267,11 +267,11 @@ define void @test_struct_store4(ptr noalias nocapture readonly %A, ptr noalias n
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], ptr [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 3, i32 3, i32 3, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[TMP0:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ST4:%.*]], ptr [[B:%.*]], i64 [[INDEX]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 3)
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
@@ -362,7 +362,7 @@ define void @test_reversed_load2_store2(ptr noalias nocapture readonly %A, ptr n
 ; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
 ; CHECK-NEXT:    store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4)
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
@@ -421,7 +421,7 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]]
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[INDEX]], 9223372036854775804
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[TMP3]], align 4
@@ -497,7 +497,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[DOTIDX]]
 ; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[TMP6:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = shl nsw <4 x i32> [[STRIDED_VEC]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP7:%.*]] = and i64 [[INDEX]], 9223372036854775804
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP7]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
@@ -612,7 +612,7 @@ define void @load_gap_reverse(ptr noalias nocapture %P1, ptr noalias nocapture %
 ; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <4 x i64> [[TMP20]], i64 3
 ; CHECK-NEXT:    store i64 [[TMP28]], ptr [[TMP11]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 -4, i64 -4, i64 -4, i64 -4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
 ; CHECK-NEXT:    [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       middle.block:
@@ -746,7 +746,7 @@ define void @mixed_load3_store3(ptr nocapture %A) {
 ; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> [[TMP4]], <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
 ; CHECK-NEXT:    store <12 x i32> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1172,7 +1172,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[INDEX]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0
@@ -1196,7 +1196,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) {
 ; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
 ; CHECK-NEXT:    [[TMP17]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1366,8 +1366,8 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = or disjoint i64 [[TMP4]], 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = or disjoint i64 [[TMP4]], 5
 ; CHECK-NEXT:    [[TMP6:%.*]] = or disjoint i64 [[TMP4]], 7
-; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[VEC_IND]], <i64 -1, i64 -1, i64 -1, i64 -1>
-; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i64> [[VEC_IND]], <i64 -3, i64 -3, i64 -3, i64 -3>
+; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1)
+; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -3)
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[OFFSET_IDX]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP5]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP6]]
@@ -1402,7 +1402,7 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) {
 ; CHECK-NEXT:    store i32 [[Z]], ptr [[TMP11]], align 4
 ; CHECK-NEXT:    store i32 [[Z]], ptr [[TMP13]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
 ; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
index 63381454cc5900..1473d292d06ac9 100644
--- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll
@@ -390,14 +390,14 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[GEP]], align 4, !alias.scope [[META23:![0-9]+]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP17]] = add <4 x i32> [[TMP16]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP17]] = add <4 x i32> [[TMP16]], splat (i32 1)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi <4 x i32> [ [[TMP17]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[DOTLCSSA]])
-; CHECK-NEXT:    store i32 [[TMP19]], ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    store i32 [[TMP19]], ptr [[ARRAYIDX5]], align 4, !alias.scope [[META27:![0-9]+]], !noalias [[META23]]
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_INC8_LOOPEXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -415,7 +415,7 @@ define i32 @multiple_uniform_stores(ptr nocapture %var1, ptr nocapture readonly
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[ITR]], [[LFTR_WIDEIV]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY3]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_INC8_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY3]], !llvm.loop [[LOOP29:![0-9]+]]
 ; CHECK:       for.inc8.loopexit.loopexit:
 ; CHECK-NEXT:    br label [[FOR_INC8_LOOPEXIT]]
 ; CHECK:       for.inc8.loopexit:
diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll
index 80d705cbf9b9f8..12bd43a1d5f9aa 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-form.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll
@@ -77,7 +77,7 @@ define void @bottom_tested(ptr %p, i32 %n) {
 ; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; TAILFOLD:       pred.store.continue2:
 ; TAILFOLD-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
-; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; TAILFOLD-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; TAILFOLD-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; TAILFOLD:       middle.block:
@@ -199,7 +199,7 @@ define i32 @early_exit_with_live_out(ptr %ptr) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    store <2 x i32> <i32 10, i32 10>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    store <2 x i32> splat (i32 10), ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -847,8 +847,8 @@ define i32 @multiple_exit_switch(ptr %p, i32 %n) {
 ; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
 ; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
-; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
-; CHECK-NEXT:    i32 2097, label [[IF_END]]
+; CHECK-NEXT:      i32 2096, label [[IF_END:%.*]]
+; CHECK-NEXT:      i32 2097, label [[IF_END]]
 ; CHECK-NEXT:    ]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
@@ -864,8 +864,8 @@ define i32 @multiple_exit_switch(ptr %p, i32 %n) {
 ; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
 ; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
 ; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
-; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
-; TAILFOLD-NEXT:    i32 2097, label [[IF_END]]
+; TAILFOLD-NEXT:      i32 2096, label [[IF_END:%.*]]
+; TAILFOLD-NEXT:      i32 2097, label [[IF_END]]
 ; TAILFOLD-NEXT:    ]
 ; TAILFOLD:       if.end:
 ; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
@@ -902,8 +902,8 @@ define i32 @multiple_exit_switch2(ptr %p, i32 %n) {
 ; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
 ; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
-; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
-; CHECK-NEXT:    i32 2097, label [[IF_END2:%.*]]
+; CHECK-NEXT:      i32 2096, label [[IF_END:%.*]]
+; CHECK-NEXT:      i32 2097, label [[IF_END2:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret i32 0
@@ -920,8 +920,8 @@ define i32 @multiple_exit_switch2(ptr %p, i32 %n) {
 ; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
 ; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
 ; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
-; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
-; TAILFOLD-NEXT:    i32 2097, label [[IF_END2:%.*]]
+; TAILFOLD-NEXT:      i32 2096, label [[IF_END:%.*]]
+; TAILFOLD-NEXT:      i32 2097, label [[IF_END2:%.*]]
 ; TAILFOLD-NEXT:    ]
 ; TAILFOLD:       if.end:
 ; TAILFOLD-NEXT:    ret i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
index de298d20fc382c..0bffac24227571 100644
--- a/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
+++ b/llvm/test/Transforms/LoopVectorize/loop-scalars.ll
@@ -21,7 +21,7 @@ define void @vector_gep(ptr %a, ptr %b, i64 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <2 x ptr> [[TMP0]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
index 0515e6f07bf015..c3d8a75acaaaf7 100644
--- a/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
+++ b/llvm/test/Transforms/LoopVectorize/memdep-fold-tail.ll
@@ -30,7 +30,7 @@ define void @maxvf3() {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], <i32 14, i32 14>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 14)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
@@ -47,7 +47,7 @@ define void @maxvf3() {
 ; CHECK-NEXT:    store i8 69, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
 ; CHECK:       pred.store.continue2:
-; CHECK-NEXT:    [[TMP7:%.*]] = add nuw nsw <2 x i32> <i32 3, i32 3>, [[VEC_IND]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add nuw nsw <2 x i32> splat (i32 3), [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
 ; CHECK:       pred.store.if3:
@@ -65,7 +65,7 @@ define void @maxvf3() {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
 ; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll b/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll
index 80348f9e46130f..ca0edb3e1a46d6 100644
--- a/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll
+++ b/llvm/test/Transforms/LoopVectorize/multiple-address-spaces.ll
@@ -26,7 +26,7 @@ define i32 @main() #0 {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [40000 x i8], ptr addrspace(1) @Y, i64 0, i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr addrspace(1) [[TMP0]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i8> [[WIDE_LOAD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [40000 x i8], ptr @X, i64 0, i64 [[INDEX]]
 ; CHECK-NEXT:    store <4 x i8> [[TMP1]], ptr [[TMP2]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
index d462d3aa650d28..7a3f9df6ec2821 100644
--- a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
+++ b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
@@ -45,7 +45,7 @@ define i32 @test(ptr %arr, i64 %n) {
 ; CHECK-NEXT:    [[TMP17:%.*]] = add nsw i64 [[TMP13]], -1
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP17]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 65, i32 65, i32 65, i32 65>, ptr [[TMP19]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 65), ptr [[TMP19]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
index 26a2fb3806d3e7..305906d9d1ef1d 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll
@@ -189,7 +189,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n)
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 -3
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP15]], align 4
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP16:%.*]] = fadd fast <4 x float> [[REVERSE]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP16:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP9]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
 ; CHECK-NEXT:    store <4 x float> [[TMP16]], ptr [[TMP18]], align 4
@@ -223,7 +223,7 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n)
 ; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 -3
 ; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4
 ; CHECK-NEXT:    [[REVERSE10:%.*]] = shufflevector <4 x float> [[WIDE_LOAD9]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP28:%.*]] = fadd fast <4 x float> [[REVERSE10]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP28:%.*]] = fadd fast <4 x float> [[REVERSE10]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
 ; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i32 0
 ; CHECK-NEXT:    store <4 x float> [[TMP28]], ptr [[TMP30]], align 4
@@ -334,7 +334,7 @@ define void @f3(ptr noalias %A, i64 %n) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    store <4 x i8> <i8 1, i8 1, i8 1, i8 1>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    store <4 x i8> splat (i8 1), ptr [[TMP2]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -355,7 +355,7 @@ define void @f3(ptr noalias %A, i64 %n) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX5]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
-; CHECK-NEXT:    store <4 x i8> <i8 1, i8 1, i8 1, i8 1>, ptr [[TMP6]], align 1
+; CHECK-NEXT:    store <4 x i8> splat (i8 1), ptr [[TMP6]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -393,7 +393,7 @@ define void @f3(ptr noalias %A, i64 %n) {
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store <4 x i8> <i8 1, i8 1, i8 1, i8 1>, ptr [[TMP2]], align 1
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store <4 x i8> splat (i8 1), ptr [[TMP2]], align 1
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -414,7 +414,7 @@ define void @f3(ptr noalias %A, i64 %n) {
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX5]], 0
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store <2 x i8> <i8 1, i8 1>, ptr [[TMP6]], align 1
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store <2 x i8> splat (i8 1), ptr [[TMP6]], align 1
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 2
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
@@ -685,7 +685,7 @@ define void @f4(ptr noalias %A, i32 signext %n) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    store <4 x i8> [[TMP1]], ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
@@ -714,7 +714,7 @@ define void @f4(ptr noalias %A, i32 signext %n) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
 ; CHECK-NEXT:    store <4 x i8> [[TMP7]], ptr [[TMP9]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]]
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
@@ -756,7 +756,7 @@ define void @f4(ptr noalias %A, i32 signext %n) {
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store <4 x i8> [[TMP1]], ptr [[TMP3]], align 1
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK-PROFITABLE-BY-DEFAULT:       middle.block:
@@ -785,7 +785,7 @@ define void @f4(ptr noalias %A, i32 signext %n) {
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store <2 x i8> [[TMP7]], ptr [[TMP9]], align 1
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
-; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], <i32 2, i32 2>
+; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], splat (i32 2)
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]]
 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
 ; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
index b6f2ae3c14893a..632986930b1fd0 100644
--- a/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
+++ b/llvm/test/Transforms/LoopVectorize/outer-loop-vec-phi-predecessor-order.ll
@@ -29,20 +29,20 @@ define void @test(ptr %src, i64 %n) {
 ; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, [[LOOP_2_HEADER1]] ], [ [[TMP2:%.*]], [[LOOP_32]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [2000 x i32], ptr [[SRC:%.*]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI3]]
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> poison)
-; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_GATHER]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 10)
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT:    [[TMP2]] = add nuw nsw <4 x i64> [[VEC_PHI3]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[TMP2]] = add nuw nsw <4 x i64> [[VEC_PHI3]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[LOOP_2_LATCH4]], label [[LOOP_32]]
 ; CHECK:       loop.2.latch4:
-; CHECK-NEXT:    [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[TMP5]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[LOOP_1_LATCH5]], label [[LOOP_2_HEADER1]]
 ; CHECK:       vector.latch:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -73,7 +73,7 @@ define void @test(ptr %src, i64 %n) {
 ; CHECK:       loop.1.latch:
 ; CHECK-NEXT:    [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
 ; CHECK-NEXT:    [[EC_1:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[EC_1]], label [[EXIT]], label [[LOOP_1_HEADER]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EC_1]], label [[EXIT]], label [[LOOP_1_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/phi-cost.ll b/llvm/test/Transforms/LoopVectorize/phi-cost.ll
index 6ee2f36dd5b415..9f67d01f460ce4 100644
--- a/llvm/test/Transforms/LoopVectorize/phi-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/phi-cost.ll
@@ -102,11 +102,11 @@ define void @phi_three_incoming_values(ptr noalias %a, ptr noalias %b, i64 %n) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]]
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], <i32 20, i32 20>
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD2]], <i32 4, i32 4>
-; CHECK-NEXT:    [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> <i32 4, i32 4>, <2 x i32> <i32 5, i32 5>
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[TMP8]], <2 x i32> <i32 3, i32 3>
-; CHECK-NEXT:    [[PREDPHI3:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[PREDPHI]], <2 x i32> <i32 9, i32 9>
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], splat (i32 20)
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD2]], splat (i32 4)
+; CHECK-NEXT:    [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> splat (i32 4), <2 x i32> splat (i32 5)
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i32> [[TMP8]], <2 x i32> splat (i32 3)
+; CHECK-NEXT:    [[PREDPHI3:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[PREDPHI]], <2 x i32> splat (i32 9)
 ; CHECK-NEXT:    store <2 x i32> [[PREDPHI3]], ptr [[TMP3]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 41d9c4d84202c6..c5a215f1efba66 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -152,7 +152,7 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1
-; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i8> [[WIDE_LOAD]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i8> [[WIDE_LOAD]], splat (i8 1)
 ; CHECK-NEXT:    store <4 x i8> [[TMP6]], ptr [[TMP5]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4
diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
index f1075e31688cb3..ae23c824baca98 100644
--- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll
@@ -37,7 +37,7 @@ define i16 @test_true_and_false_branch_equal() {
 ; CHECK-NEXT:    br label [[PRED_SREM_CONTINUE2]]
 ; CHECK:       pred.srem.continue2:
 ; CHECK-NEXT:    [[TMP10:%.*]] = phi <2 x i16> [ [[TMP6]], [[PRED_SREM_CONTINUE]] ], [ [[TMP9]], [[PRED_SREM_IF1]] ]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> <i16 5786, i16 5786>, <2 x i16> [[TMP10]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> splat (i16 5786), <2 x i16> [[TMP10]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1
 ; CHECK-NEXT:    store i16 [[TMP11]], ptr @v_39, align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
@@ -64,7 +64,7 @@ define i16 @test_true_and_false_branch_equal() {
 ; CHECK-NEXT:    store i16 [[COND6]], ptr @v_39, align 1
 ; CHECK-NEXT:    [[INC7]] = add nsw i16 [[I_07]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i16 [[INC7]], 111
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[RV:%.*]] = load i16, ptr @v_39, align 1
 ; CHECK-NEXT:    ret i16 [[RV]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr45259.ll b/llvm/test/Transforms/LoopVectorize/pr45259.ll
index 008971697775e4..42bd14e4b752bc 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45259.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45259.ll
@@ -41,7 +41,7 @@ define i8 @widget(ptr %arr, i8 %t9) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP11:%.*]] = add <4 x i8> [[VEC_IND]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP11:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i8> [[TMP11]], i32 0
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[ARR]], i8 [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt <4 x i8> [[TMP11]], [[BROADCAST_SPLAT]]
@@ -49,7 +49,7 @@ define i8 @widget(ptr %arr, i8 %t9) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
 ; CHECK-NEXT:    store <4 x i8> [[TMP15]], ptr [[TMP16]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], <i8 4, i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -66,7 +66,7 @@ define i8 @widget(ptr %arr, i8 %t9) {
 ; CHECK-NEXT:    [[T3_I8:%.*]] = zext i1 [[T3_I]] to i8
 ; CHECK-NEXT:    store i8 [[T3_I8]], ptr [[PTR]], align 1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq ptr [[T1_0_LCSSA]], [[PTR]]
-; CHECK-NEXT:    br i1 [[EC]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EC]], label [[FOR_EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       for.exit:
 ; CHECK-NEXT:    [[IV_NEXT_LCSSA:%.*]] = phi i8 [ [[IV_NEXT]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i8 [[IV_NEXT_LCSSA]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
index dc3480fbb11a88..ee6975fa0c9e6c 100644
--- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll
@@ -19,7 +19,7 @@ define void @pr45679(ptr %A) optsize {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], <i32 13, i32 13, i32 13, i32 13>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 13)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
@@ -53,7 +53,7 @@ define void @pr45679(ptr %A) optsize {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -79,9 +79,9 @@ define void @pr45679(ptr %A) optsize {
 ; VF2UF2:       vector.body:
 ; VF2UF2-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
 ; VF2UF2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ]
-; VF2UF2-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
-; VF2UF2-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], <i32 13, i32 13>
-; VF2UF2-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i32> [[STEP_ADD]], <i32 13, i32 13>
+; VF2UF2-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
+; VF2UF2-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 13)
+; VF2UF2-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i32> [[STEP_ADD]], splat (i32 13)
 ; VF2UF2-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; VF2UF2-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF2UF2:       pred.store.if:
@@ -115,7 +115,7 @@ define void @pr45679(ptr %A) optsize {
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; VF2UF2:       pred.store.continue6:
 ; VF2UF2-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; VF2UF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
+; VF2UF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
 ; VF2UF2-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
 ; VF2UF2-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; VF2UF2:       middle.block:
@@ -214,7 +214,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], <i64 13, i64 13, i64 13, i64 13>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 13)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
@@ -252,7 +252,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
 ; CHECK:       pred.store.continue6:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
 ; CHECK-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
@@ -279,9 +279,9 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF2UF2:       vector.body:
 ; VF2UF2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
 ; VF2UF2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ]
-; VF2UF2-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; VF2UF2-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], <i64 13, i64 13>
-; VF2UF2-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], <i64 13, i64 13>
+; VF2UF2-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2UF2-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 13)
+; VF2UF2-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], splat (i64 13)
 ; VF2UF2-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
 ; VF2UF2-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; VF2UF2:       pred.store.if:
@@ -319,7 +319,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) {
 ; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
 ; VF2UF2:       pred.store.continue6:
 ; VF2UF2-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; VF2UF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
+; VF2UF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
 ; VF2UF2-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
 ; VF2UF2-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; VF2UF2:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
index 5f2298514be50e..d7f5f646f1873c 100644
--- a/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr51614-fold-tail-by-masking.ll
@@ -22,7 +22,7 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize {
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IV]], <i32 40, i32 40>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IV]], splat (i32 40)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -77,7 +77,7 @@ define dso_local i16 @reverse_interleave_load_fold_mask() optsize {
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i16 [[TMP29]], [[TMP30]]
 ; CHECK-NEXT:    [[PREVSUM]] = add nsw i16 [[SUM]], [[ADD]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[IV]], 1
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[PREVSUM_LCSSA:%.*]] = phi i16 [ [[PREVSUM]], [[LOOP]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i16 [[PREVSUM_LCSSA]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
index bc7e3c6c6e2510..a8c065bcff0348 100644
--- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll
@@ -8,7 +8,7 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[C_1:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT1]], <2 x i1> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i1> poison, i1 [[C_2:%.*]], i64 0
@@ -19,18 +19,18 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 6, i32 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 35902, i32 0>, [[VECTOR_PH]] ], [ [[PREDPHI7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT2]], <i1 true, i1 true>
-; CHECK-NEXT:    [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], <i32 10, i32 10>
+; CHECK-NEXT:    [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 10)
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], <i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[TMP0]], <i32 20, i32 20>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[TMP0]], splat (i32 20)
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP4]], <2 x i1> [[BROADCAST_SPLAT4]], <2 x i1> zeroinitializer
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> <i32 9, i32 9>, <2 x i32> [[VEC_IND]]
-; CHECK-NEXT:    [[PREDPHI5:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> <i32 9, i32 9>, <2 x i32> [[PREDPHI]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> splat (i32 9), <2 x i32> [[VEC_IND]]
+; CHECK-NEXT:    [[PREDPHI5:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> splat (i32 9), <2 x i32> [[PREDPHI]]
 ; CHECK-NEXT:    [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[PREDPHI7]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP3]], <2 x i32> [[PREDPHI6]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 176
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -58,7 +58,7 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 {
 ; CHECK-NEXT:    [[P_2]] = phi i32 [ [[V_2]], [[LOOP_HEADER]] ], [ [[V_2_ADD]], [[BODY_1]] ], [ [[ADD_2]], [[BODY_2]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[EC:%.*]] = icmp ult i32 [[IV]], 181
-; CHECK-NEXT:    br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    [[E_1:%.*]] = phi i32 [ [[P_1]], [[LOOP_LATCH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[E_2:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/pr66616.ll b/llvm/test/Transforms/LoopVectorize/pr66616.ll
index 2fb7f88e5341e5..c60b216f86fa8d 100644
--- a/llvm/test/Transforms/LoopVectorize/pr66616.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr66616.ll
@@ -15,7 +15,7 @@ define void @pr66616(ptr %ptr) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PTR]], align 4
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
index e18b3d7477a16d..2b65eca647b652 100644
--- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
@@ -24,8 +24,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; IC1-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP2]]
 ; IC1-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
 ; IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP3]], align 1
-; IC1-NEXT:    [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], <i8 -12, i8 -12>
-; IC1-NEXT:    [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], <i8 13, i8 13>
+; IC1-NEXT:    [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12)
+; IC1-NEXT:    [[TMP4:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13)
 ; IC1-NEXT:    [[TMP11:%.*]] = or <2 x i1> [[TMP7]], [[TMP4]]
 ; IC1-NEXT:    [[TMP10:%.*]] = xor <2 x i1> [[TMP11]], <i1 true, i1 true>
 ; IC1-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
@@ -124,10 +124,10 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
 ; IC2-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2
 ; IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP5]], align 1
 ; IC2-NEXT:    [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1
-; IC2-NEXT:    [[TMP13:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], <i8 -12, i8 -12>
-; IC2-NEXT:    [[TMP14:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], <i8 -12, i8 -12>
-; IC2-NEXT:    [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], <i8 13, i8 13>
-; IC2-NEXT:    [[TMP8:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], <i8 13, i8 13>
+; IC2-NEXT:    [[TMP13:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12)
+; IC2-NEXT:    [[TMP14:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], splat (i8 -12)
+; IC2-NEXT:    [[TMP7:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 13)
+; IC2-NEXT:    [[TMP8:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD6]], splat (i8 13)
 ; IC2-NEXT:    [[TMP21:%.*]] = or <2 x i1> [[TMP13]], [[TMP7]]
 ; IC2-NEXT:    [[TMP22:%.*]] = or <2 x i1> [[TMP14]], [[TMP8]]
 ; IC2-NEXT:    [[TMP19:%.*]] = xor <2 x i1> [[TMP21]], <i1 true, i1 true>
@@ -340,21 +340,21 @@ define void @switch_to_header(ptr %start) {
 ; IC1-NEXT:  [[ENTRY:.*]]:
 ; IC1-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; IC1:       [[LOOP_HEADER]]:
-; IC1-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_HEADER_BACKEDGE:.*]] ]
+; IC1-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ]
 ; IC1-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; IC1-NEXT:    switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [
-; IC1-NEXT:      i64 120, label %[[LOOP_HEADER_BACKEDGE]]
+; IC1-NEXT:      i64 120, label %[[IF_THEN]]
 ; IC1-NEXT:      i64 100, label %[[LOOP_LATCH]]
 ; IC1-NEXT:    ]
-; IC1:       [[LOOP_HEADER_BACKEDGE]]:
+; IC1:       [[IF_THEN]]:
 ; IC1-NEXT:    br label %[[LOOP_HEADER]]
-; IC1:       [[IF_THEN:.*:]]
+; IC1:       [[IF_THEN1:.*:]]
 ; IC1-NEXT:    [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison
 ; IC1-NEXT:    store i64 42, ptr [[GEP]], align 1
 ; IC1-NEXT:    unreachable
 ; IC1:       [[LOOP_LATCH]]:
 ; IC1-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100
-; IC1-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP_HEADER_BACKEDGE]]
+; IC1-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]]
 ; IC1:       [[EXIT]]:
 ; IC1-NEXT:    ret void
 ;
@@ -363,21 +363,21 @@ define void @switch_to_header(ptr %start) {
 ; IC2-NEXT:  [[ENTRY:.*]]:
 ; IC2-NEXT:    br label %[[LOOP_HEADER:.*]]
 ; IC2:       [[LOOP_HEADER]]:
-; IC2-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_HEADER_BACKEDGE:.*]] ]
+; IC2-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ]
 ; IC2-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
 ; IC2-NEXT:    switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [
-; IC2-NEXT:      i64 120, label %[[LOOP_HEADER_BACKEDGE]]
+; IC2-NEXT:      i64 120, label %[[IF_THEN]]
 ; IC2-NEXT:      i64 100, label %[[LOOP_LATCH]]
 ; IC2-NEXT:    ]
-; IC2:       [[LOOP_HEADER_BACKEDGE]]:
+; IC2:       [[IF_THEN]]:
 ; IC2-NEXT:    br label %[[LOOP_HEADER]]
-; IC2:       [[IF_THEN:.*:]]
+; IC2:       [[IF_THEN1:.*:]]
 ; IC2-NEXT:    [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison
 ; IC2-NEXT:    store i64 42, ptr [[GEP]], align 1
 ; IC2-NEXT:    unreachable
 ; IC2:       [[LOOP_LATCH]]:
 ; IC2-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100
-; IC2-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP_HEADER_BACKEDGE]]
+; IC2-NEXT:    br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]]
 ; IC2:       [[EXIT]]:
 ; IC2-NEXT:    ret void
 ;
@@ -417,7 +417,7 @@ define void @switch_all_to_default(ptr %start) {
 ; IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
 ; IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[TMP0]]
 ; IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
-; IC1-NEXT:    store <2 x i64> <i64 42, i64 42>, ptr [[TMP2]], align 1
+; IC1-NEXT:    store <2 x i64> splat (i64 42), ptr [[TMP2]], align 1
 ; IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; IC1-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; IC1-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -453,8 +453,8 @@ define void @switch_all_to_default(ptr %start) {
 ; IC2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[TMP0]]
 ; IC2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
 ; IC2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2
-; IC2-NEXT:    store <2 x i64> <i64 42, i64 42>, ptr [[TMP4]], align 1
-; IC2-NEXT:    store <2 x i64> <i64 42, i64 42>, ptr [[TMP5]], align 1
+; IC2-NEXT:    store <2 x i64> splat (i64 42), ptr [[TMP4]], align 1
+; IC2-NEXT:    store <2 x i64> splat (i64 42), ptr [[TMP5]], align 1
 ; IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; IC2-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; IC2-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll b/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll
index 66509ffedd68ba..2bf02b0b906f7e 100644
--- a/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll
+++ b/llvm/test/Transforms/LoopVectorize/preserve-or-disjoint.ll
@@ -18,8 +18,8 @@ define void @generate_disjoint_flags(i64 %n, ptr noalias %x) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = or <4 x i32> [[WIDE_LOAD]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = or disjoint <4 x i32> [[WIDE_LOAD]], splat (i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = or <4 x i32> [[WIDE_LOAD]], splat (i32 3)
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw <4 x i32> [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
index 567eea8e2aeb82..240421341626ab 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll
@@ -15,7 +15,7 @@ define float @cond_fadd(ptr noalias nocapture readonly %a, ptr noalias nocapture
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], splat (float 5.000000e+00)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -129,7 +129,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) {
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -169,7 +169,7 @@ define float @cond_cmp_sel(ptr noalias %a, ptr noalias %cond, i64 %N) {
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP24:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP25:%.*]] = select fast <4 x i1> [[TMP1]], <4 x float> [[TMP24]], <4 x float> <float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000>
+; CHECK-NEXT:    [[TMP25:%.*]] = select fast <4 x i1> [[TMP1]], <4 x float> [[TMP24]], <4 x float> splat (float 0x47EFFFFFE0000000)
 ; CHECK-NEXT:    [[TMP26:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP25]])
 ; CHECK-NEXT:    [[RDX_MINMAX_SELECT]] = call fast float @llvm.minnum.f32(float [[TMP26]], float [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -287,7 +287,7 @@ define i32 @conditional_and(ptr noalias %A, ptr noalias %B, i32 %cond, i64 nound
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP24:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP25:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP24]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP25:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP24]], <4 x i32> splat (i32 -1)
 ; CHECK-NEXT:    [[TMP26:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP25]])
 ; CHECK-NEXT:    [[TMP27]] = and i32 [[TMP26]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -549,7 +549,7 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP24:%.*]] = phi <4 x i64> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[TMP24]], <i64 3, i64 3, i64 3, i64 3>
+; CHECK-NEXT:    [[TMP25:%.*]] = icmp eq <4 x i64> [[TMP24]], splat (i64 3)
 ; CHECK-NEXT:    [[TMP26:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP25]], <4 x i1> zeroinitializer
 ; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x i1> [[TMP26]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
@@ -588,7 +588,7 @@ define i64 @nested_cond_and(ptr noalias nocapture readonly %a, ptr noalias nocap
 ; CHECK:       pred.load.continue14:
 ; CHECK-NEXT:    [[TMP46:%.*]] = phi <4 x i64> [ [[TMP41]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP45]], [[PRED_LOAD_IF13]] ]
 ; CHECK-NEXT:    [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP26]], <4 x i64> [[TMP46]], <4 x i64> [[TMP24]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[PREDPHI_V]], <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x i64> [[PREDPHI_V]], <4 x i64> splat (i64 -1)
 ; CHECK-NEXT:    [[PREDPHI15]] = and <4 x i64> [[VEC_PHI]], [[PREDPHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -811,7 +811,7 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
 ; CHECK-NEXT:    [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[COND]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -850,7 +850,7 @@ define float @cond_cond(ptr noalias %src1, ptr noalias %src2, ptr noalias %cond,
 ; CHECK-NEXT:    [[TMP24:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP23]], [[PRED_LOAD_IF5]] ]
 ; CHECK-NEXT:    [[TMP25:%.*]] = fadd fast <4 x float> [[TMP24]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[TMP25]], <4 x float> [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP26:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
+; CHECK-NEXT:    [[TMP26:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], splat (float 7.000000e+00)
 ; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x i1> [[TMP26]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
 ; CHECK:       pred.load.if7:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll
index 1dfd8d93d2cf1f..3b07a76440e857 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-min-max.ll
@@ -104,7 +104,7 @@ define i32 @reduction_smin_intrinsic(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP1]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
@@ -246,7 +246,7 @@ define i32 @reduction_umax_intrinsic(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP1]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]])
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
index 89349314f27029..5d9b08384dda52 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll
@@ -13,7 +13,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -57,7 +57,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]])
 ; CHECK-NEXT:    [[TMP26]] = add i32 [[TMP25]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -100,7 +100,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -166,8 +166,8 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP47:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP46]])
 ; CHECK-NEXT:    [[TMP48]] = add i32 [[TMP47]], [[TMP45]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
@@ -214,7 +214,7 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -257,11 +257,11 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]])
 ; CHECK-NEXT:    [[TMP26:%.*]] = add i32 [[TMP25]], [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> splat (i32 3), <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP28:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]])
 ; CHECK-NEXT:    [[TMP29]] = add i32 [[TMP28]], [[TMP26]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -305,7 +305,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -361,18 +361,18 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK:       pred.load.continue8:
 ; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ]
 ; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
-; CHECK-NEXT:    [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]])
 ; CHECK-NEXT:    [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]])
 ; CHECK-NEXT:    [[TMP45:%.*]] = mul i32 [[TMP44]], [[TMP42]]
-; CHECK-NEXT:    [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[TMP47:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]])
 ; CHECK-NEXT:    [[TMP48]] = mul i32 [[TMP47]], [[TMP45]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
@@ -420,7 +420,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE8]] ]
 ; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -484,8 +484,8 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]])
 ; CHECK-NEXT:    [[TMP46]] = add i32 [[TMP45]], [[TMP43]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
@@ -532,7 +532,7 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -588,14 +588,14 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
 ; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]])
 ; CHECK-NEXT:    [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]])
 ; CHECK-NEXT:    [[TMP45]] = mul i32 [[TMP44]], [[TMP42]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
@@ -640,7 +640,7 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -696,14 +696,14 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
 ; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 -1)
 ; CHECK-NEXT:    [[TMP41:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP40]])
 ; CHECK-NEXT:    [[TMP42:%.*]] = and i32 [[TMP41]], [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 -1)
 ; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP43]])
 ; CHECK-NEXT:    [[TMP45]] = and i32 [[TMP44]], [[TMP42]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
@@ -748,7 +748,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -809,7 +809,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP42:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP41]])
 ; CHECK-NEXT:    [[TMP43]] = or i32 [[TMP42]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       middle.block:
@@ -854,7 +854,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -915,7 +915,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP42:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP41]])
 ; CHECK-NEXT:    [[TMP43]] = xor i32 [[TMP42]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; CHECK:       middle.block:
@@ -960,7 +960,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1021,7 +1021,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP42:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer
 ; CHECK-NEXT:    [[TMP43]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1066,7 +1066,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1122,14 +1122,14 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
 ; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP40]])
 ; CHECK-NEXT:    [[TMP42:%.*]] = fmul fast float [[TMP41]], [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP43:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP43:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP43]])
 ; CHECK-NEXT:    [[TMP45]] = fmul fast float [[TMP44]], [[TMP42]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1174,7 +1174,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1214,11 +1214,11 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
 ; CHECK:       pred.load.continue6:
 ; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> splat (i32 2147483647)
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]])
 ; CHECK-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1261,7 +1261,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1305,7 +1305,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]])
 ; CHECK-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1354,9 +1354,9 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
 ; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
@@ -1440,8 +1440,8 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1488,7 +1488,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP30]] = add nuw nsw <4 x i32> [[TMP1]], [[TMP29]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1536,7 +1536,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -1583,7 +1583,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP29]] = and <4 x i32> [[VEC_PHI]], [[TMP28]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
index 99a0b708035884..f65f62bbf54bf2 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
@@ -79,13 +79,13 @@ define i32 @predicated(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[VEC_PHI4:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP104:%.*]], [[PRED_LOAD_CONTINUE36]] ]
 ; CHECK-NEXT:    [[VEC_PHI5:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP107:%.*]], [[PRED_LOAD_CONTINUE36]] ]
 ; CHECK-NEXT:    [[VEC_PHI6:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP110:%.*]], [[PRED_LOAD_CONTINUE36]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i64> [[STEP_ADD]], <i64 257, i64 257, i64 257, i64 257>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i64> [[STEP_ADD1]], <i64 257, i64 257, i64 257, i64 257>
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i64> [[STEP_ADD2]], <i64 257, i64 257, i64 257, i64 257>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <4 x i64> [[STEP_ADD]], splat (i64 257)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i64> [[STEP_ADD_2]], splat (i64 257)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i64> [[STEP_ADD_3]], splat (i64 257)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -258,7 +258,7 @@ define i32 @predicated(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP109:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP108]])
 ; CHECK-NEXT:    [[TMP110]] = add i32 [[TMP109]], [[VEC_PHI6]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 16, i64 16, i64 16, i64 16>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 16)
 ; CHECK-NEXT:    [[TMP111:%.*]] = icmp eq i64 [[INDEX_NEXT]], 272
 ; CHECK-NEXT:    br i1 [[TMP111]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
@@ -302,7 +302,7 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N_RND_UP]], -16
 ; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i64 [[N]], -1
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[COND:%.*]], i64 0
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLATINSERT7]], <i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLATINSERT7]], splat (i32 7)
 ; CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
@@ -314,9 +314,9 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) {
 ; CHECK-NEXT:    [[VEC_PHI4:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP112:%.*]], [[PRED_LOAD_CONTINUE38]] ]
 ; CHECK-NEXT:    [[VEC_PHI5:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP115:%.*]], [[PRED_LOAD_CONTINUE38]] ]
 ; CHECK-NEXT:    [[VEC_PHI6:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP118:%.*]], [[PRED_LOAD_CONTINUE38]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
+; CHECK-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]]
@@ -484,20 +484,20 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) {
 ; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE38]]
 ; CHECK:       pred.load.continue35:
 ; CHECK-NEXT:    [[TMP106:%.*]] = phi <4 x i32> [ [[TMP100]], [[PRED_LOAD_CONTINUE36]] ], [ [[TMP105]], [[PRED_LOAD_IF37]] ]
-; CHECK-NEXT:    [[TMP107:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP34]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP107:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP34]], <4 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[TMP108:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP107]])
 ; CHECK-NEXT:    [[TMP109]] = mul i32 [[TMP108]], [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP110:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP58]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP110:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP58]], <4 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[TMP111:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP110]])
 ; CHECK-NEXT:    [[TMP112]] = mul i32 [[TMP111]], [[VEC_PHI4]]
-; CHECK-NEXT:    [[TMP113:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP82]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP113:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP82]], <4 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[TMP114:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP113]])
 ; CHECK-NEXT:    [[TMP115]] = mul i32 [[TMP114]], [[VEC_PHI5]]
-; CHECK-NEXT:    [[TMP116:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP106]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP116:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP106]], <4 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[TMP117:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP116]])
 ; CHECK-NEXT:    [[TMP118]] = mul i32 [[TMP117]], [[VEC_PHI6]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 16, i64 16, i64 16, i64 16>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 16)
 ; CHECK-NEXT:    [[TMP119:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP119]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index 7c5d6a1edf0b4b..e23b4801653c1f 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -69,7 +69,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]])
 ; CHECK-NEXT:    [[TMP7]] = add i32 [[TMP6]], [[TMP5]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
@@ -174,7 +174,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]])
 ; CHECK-NEXT:    [[TMP7]] = mul i32 [[TMP6]], [[TMP5]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
@@ -231,7 +231,7 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
 ; CHECK-NEXT:    [[TMP6]] = add i32 [[TMP5]], [[TMP4]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
@@ -689,9 +689,9 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
 ; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
@@ -832,7 +832,7 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]])
 ; CHECK-NEXT:    [[TMP7]] = add i32 [[TMP6]], [[TMP5]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
 ; CHECK:       middle.block:
@@ -926,7 +926,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i8> [ <i8 -1, i8 -1, i8 -1, i8 -1>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i8> [ splat (i8 -1), [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4
@@ -1122,9 +1122,9 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], <i8 31, i8 31, i8 31, i8 31>
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], <i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], <i8 31, i8 31, i8 31, i8 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], splat (i8 31)
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], splat (i8 3)
+; CHECK-NEXT:    [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], splat (i8 31)
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext nneg <4 x i8> [[TMP4]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
 ; CHECK-NEXT:    [[TMP7]] = add i32 [[TMP6]], [[VEC_PHI]]
@@ -1207,9 +1207,9 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H:%.*]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], <i8 31, i8 31, i8 31, i8 31>
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], <i8 3, i8 3, i8 3, i8 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], <i8 31, i8 31, i8 31, i8 31>
+; CHECK-NEXT:    [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], splat (i8 31)
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], splat (i8 3)
+; CHECK-NEXT:    [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], splat (i8 31)
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext nneg <4 x i8> [[TMP4]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i32> zeroinitializer, <4 x i32> [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]])
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
index 7cc58edbcb408d..491a9ec0f0d4da 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll
@@ -13,7 +13,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -56,7 +56,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25]] = add <4 x i32> [[VEC_PHI]], [[TMP24]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -98,7 +98,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -159,7 +159,7 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP42:%.*]] = add <4 x i32> [[TMP41]], [[TMP39]]
 ; CHECK-NEXT:    [[TMP43]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP42]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
@@ -204,8 +204,8 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -265,7 +265,7 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = mul <4 x i32> [[TMP40]], [[TMP39]]
 ; CHECK-NEXT:    [[TMP42]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP41]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -309,8 +309,8 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -367,10 +367,10 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
 ; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
 ; CHECK-NEXT:    [[TMP40:%.*]] = and <4 x i32> [[TMP38]], [[TMP39]]
-; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> splat (i32 -1)
 ; CHECK-NEXT:    [[TMP42]] = and <4 x i32> [[VEC_PHI]], [[TMP41]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
@@ -415,7 +415,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -475,7 +475,7 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42]] = or <4 x i32> [[VEC_PHI]], [[TMP41]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
@@ -520,7 +520,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -580,7 +580,7 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP42]] = xor <4 x i32> [[VEC_PHI]], [[TMP41]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
@@ -625,7 +625,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -685,7 +685,7 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = fadd fast <4 x float> [[TMP40]], [[TMP39]]
 ; CHECK-NEXT:    [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
@@ -730,7 +730,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -790,7 +790,7 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP41:%.*]] = fmul fast <4 x float> [[TMP40]], [[TMP39]]
 ; CHECK-NEXT:    [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       middle.block:
@@ -834,8 +834,8 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -878,7 +878,7 @@ define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]])
 ; CHECK-NEXT:    [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; CHECK:       middle.block:
@@ -920,8 +920,8 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
 ; CHECK:       pred.load.if:
@@ -964,7 +964,7 @@ define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-NEXT:    [[TMP24:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]])
 ; CHECK-NEXT:    [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
 ; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
index 81364f1fd6aa8e..8a8439fca439d0 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll
@@ -17,7 +17,7 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255)
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[BROADCAST_SPLAT2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8>
 ; CHECK-NEXT:    [[TMP4]] = zext <4 x i8> [[TMP3]] to <4 x i32>
@@ -93,10 +93,10 @@ define i8 @PR34687_no_undef(i1 %c, i32 %x, i32 %n) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> <i32 99, i32 99, i32 99, i32 99>, [[TMP0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <4 x i32> splat (i32 99), [[TMP0]]
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255)
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[PREDPHI]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i8>
 ; CHECK-NEXT:    [[TMP6]] = zext <4 x i8> [[TMP5]] to <4 x i32>
@@ -175,8 +175,8 @@ define i32 @PR35734(i32 %x, i32 %y) {
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP3]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP3]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i1>
 ; CHECK-NEXT:    [[TMP6]] = sext <4 x i1> [[TMP5]] to <4 x i32>
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/reduction.ll b/llvm/test/Transforms/LoopVectorize/reduction.ll
index d574ec720e92a5..a80833de28697e 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction.ll
@@ -25,7 +25,7 @@ define i32 @reduction_sum(i32 %n, ptr %A, ptr %B) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP7]] = add <4 x i32> [[TMP6]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -96,7 +96,7 @@ define i32 @reduction_prod(i32 %n, ptr %A, ptr %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
@@ -106,7 +106,7 @@ define i32 @reduction_prod(i32 %n, ptr %A, ptr %B) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP7]] = mul <4 x i32> [[TMP6]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
@@ -187,7 +187,7 @@ define i32 @reduction_mix(i32 %n, ptr %A, ptr %B) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP7]] = add <4 x i32> [[TMP6]], [[TMP5]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -268,7 +268,7 @@ define i32 @reduction_mul(i32 %n, ptr %A, ptr %B) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[TMP7]] = mul <4 x i32> [[TMP6]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
@@ -416,7 +416,7 @@ define i32 @reduction_and(i32 %n, ptr %A, ptr %B) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
@@ -760,9 +760,9 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
 ; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], <i1 true, i1 true, i1 true, i1 true>
 ; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
@@ -1059,7 +1059,7 @@ define i32 @reduction_sum_multiuse(i32 %n, ptr %A, ptr %B) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP7]] = add <4 x i32> [[TMP6]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
index c1322792071e45..156f075f9c0c01 100644
--- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -261,11 +261,11 @@ define void @reverse_forward_induction_i64_i8() {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], <i8 4, i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 1)
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[TMP0]]
@@ -278,7 +278,7 @@ define void @reverse_forward_induction_i64_i8() {
 ; CHECK-NEXT:    [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    store <4 x i32> [[REVERSE2]], ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], <i8 4, i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -330,11 +330,11 @@ define void @reverse_forward_induction_i64_i8_signed() {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 -127, i8 -126, i8 -125, i8 -124>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], <i8 4, i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], <i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 1)
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[TMP0]]
@@ -347,7 +347,7 @@ define void @reverse_forward_induction_i64_i8_signed() {
 ; CHECK-NEXT:    [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    store <4 x i32> [[REVERSE2]], ptr [[TMP11]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], <i8 4, i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll
index e7e63e55802fe1..bb515cd583e5bc 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check-small-clamped-bounds.ll
@@ -36,7 +36,7 @@ define void @load_clamped_index(ptr %A, ptr %B, i32 %N) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i32> [[WIDE_LOAD]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 10)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP3]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
@@ -109,7 +109,7 @@ define void @store_clamped_index(ptr %A, ptr %B, i32 %N) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP3]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i32> [[WIDE_LOAD]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 10)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP4]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
@@ -190,7 +190,7 @@ define void @load_clamped_index_offset_1(ptr %A, ptr %B, i32 %N) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4
-; CHECK-NEXT:    [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 10)
 ; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP10]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[TMP14]], ptr [[TMP16]], align 4
@@ -375,7 +375,7 @@ define void @clamped_index_equal_dependence(ptr %A, ptr %B, i32 %N) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[WIDE_LOAD]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[WIDE_LOAD]], splat (i32 10)
 ; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
index 9521c0933fe876..ccf02b96abc9d2 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll
@@ -32,7 +32,7 @@ define i32 @foo(ptr nocapture %a, ptr nocapture %b, i32 %n) nounwind uwtable ssp
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], !dbg [[DBG9]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]], !dbg [[DBG9]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4, !dbg [[DBG9]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>, !dbg [[DBG9]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00), !dbg [[DBG9]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]], !dbg [[DBG9]]
 ; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[TMP4]], align 4, !dbg [[DBG9]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4, !dbg [[DBG9]]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll
index 07e7b462e7bf98..da8e2add18a674 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll
@@ -1460,7 +1460,7 @@ define void @nested_loop_start_of_inner_ptr_addrec_is_same_outer_addrec(ptr noca
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 10)
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP7]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
diff --git a/llvm/test/Transforms/LoopVectorize/scalarize-masked-call.ll b/llvm/test/Transforms/LoopVectorize/scalarize-masked-call.ll
index beac46d2eb9d97..0eddc913702720 100644
--- a/llvm/test/Transforms/LoopVectorize/scalarize-masked-call.ll
+++ b/llvm/test/Transforms/LoopVectorize/scalarize-masked-call.ll
@@ -16,7 +16,7 @@ define void @cond_call(ptr readonly %src, ptr noalias %dest, i64 %N) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_CALL_CONTINUE2:%.*]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[SRC:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i64> [[WIDE_LOAD]], <i64 5, i64 5>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i64> [[WIDE_LOAD]], splat (i64 5)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_CALL_IF:%.*]], label [[PRED_CALL_CONTINUE:%.*]]
 ; CHECK:       pred.call.if:
@@ -62,7 +62,7 @@ define void @cond_call(ptr readonly %src, ptr noalias %dest, i64 %N) {
 ; CHECK-NEXT:    store i64 [[ST_VALUE]], ptr [[ST_ADDR]], align 8
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
index 43eeefb7744900..37a998cb7e9387 100644
--- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
+++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll
@@ -43,7 +43,7 @@ define void @step_direction_unknown(i32 %arg, ptr %dst) {
 ; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP16]], align 8
 ; CHECK-NEXT:    store double 0.000000e+00, ptr [[TMP18]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
 ; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll
index f747993c54fc99..f3e69b41f76379 100644
--- a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll
@@ -11,22 +11,22 @@ define i64 @pr62565_incoming_value_known_undef(i64 %a, ptr %src) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], <i32 1, i32 1>
-; CHECK-NEXT:    [[NOT:%*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true>
-; CHECK-NEXT:    [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], splat (i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true>
+; CHECK-NEXT:    [[TMP5]] = or <2 x i1> [[VEC_PHI]], [[TMP4]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]])
-; CHECK-NEXT:    [[FR_TMP6:%.*]] = freeze i1 [[TMP6]]
-; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 undef
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP5]])
+; CHECK-NEXT:    [[TMP8:%.*]] = freeze i1 [[TMP7]]
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[A]], i64 undef
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
@@ -74,22 +74,22 @@ define i64 @pr62565_incoming_value_known_poison(i64 %a, ptr %src) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], <i32 1, i32 1>
-; CHECK-NEXT:    [[NOT:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true>
-; CHECK-NEXT:    [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], splat (i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true>
+; CHECK-NEXT:    [[TMP5]] = or <2 x i1> [[VEC_PHI]], [[TMP4]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]])
-; CHECK-NEXT:    [[FR_TMP6:%.*]] = freeze i1 [[TMP6]]
-; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 poison
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP5]])
+; CHECK-NEXT:    [[TMP8:%.*]] = freeze i1 [[TMP7]]
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[A]], i64 poison
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
@@ -137,22 +137,22 @@ define i64 @pr62565_incoming_value_may_be_poison(i64 %a, ptr %src, i64 %start) {
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], <i32 1, i32 1>
-; CHECK-NEXT:    [[NOT:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true>
-; CHECK-NEXT:    [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], splat (i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true>
+; CHECK-NEXT:    [[TMP5]] = or <2 x i1> [[VEC_PHI]], [[TMP4]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32
-; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]])
-; CHECK-NEXT:    [[FR_TMP6:%.*]] = freeze i1 [[TMP6]]
-; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 [[START]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP5]])
+; CHECK-NEXT:    [[TMP8:%.*]] = freeze i1 [[TMP7]]
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[A]], i64 [[START]]
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll
index 087a0aa429b7ed..b7fc1e83ce23f4 100644
--- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll
@@ -29,8 +29,8 @@ define i32 @test(i64 %N, i32 %x) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3>
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], <i32 10, i32 10, i32 10, i32 10>
-; CHECK-NEXT:    [[TMP2]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_PHI]], <4 x i32> <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], splat (i32 10)
+; CHECK-NEXT:    [[TMP2]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_PHI]], <4 x i32> splat (i32 10)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP2]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -91,7 +91,7 @@ define i32 @pr66895_tail_fold_reduction_exit_inst_gets_simplified(i32 %n) {
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], <i32 12, i32 12, i32 12, i32 12>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 12)
 ; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_PHI]], <4 x i32> [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
index 6407583061e601..96d168106729e6 100644
--- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
@@ -24,12 +24,12 @@ define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP4]], align 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> <i16 1, i16 1>, <2 x i16> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> splat (i16 1), <2 x i16> [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0
 ; CHECK-NEXT:    store <2 x i16> [[PREDPHI]], ptr [[TMP7]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -113,12 +113,12 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true>
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x i16> zeroinitializer, <2 x i16> [[WIDE_LOAD]]
-; CHECK-NEXT:    [[PREDPHI1:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> <i16 1, i16 1>, <2 x i16> [[PREDPHI]]
+; CHECK-NEXT:    [[PREDPHI1:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> splat (i16 1), <2 x i16> [[PREDPHI]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP9]], i32 0
 ; CHECK-NEXT:    store <2 x i16> [[PREDPHI1]], ptr [[TMP10]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
@@ -208,9 +208,9 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0
 ; CHECK-NEXT:    store <2 x i16> [[TMP9]], ptr [[TMP11]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], <i16 2, i16 2>
-; CHECK-NEXT:    [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], <i16 2, i16 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2)
+; CHECK-NEXT:    [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], splat (i16 2)
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -305,12 +305,12 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true>
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP17]], <2 x i16> zeroinitializer, <2 x i16> [[TMP14]]
-; CHECK-NEXT:    [[PREDPHI3:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> <i16 1, i16 1>, <2 x i16> [[PREDPHI]]
+; CHECK-NEXT:    [[PREDPHI3:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> splat (i16 1), <2 x i16> [[PREDPHI]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0
 ; CHECK-NEXT:    store <2 x i16> [[PREDPHI3]], ptr [[TMP19]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
 ; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
@@ -387,7 +387,7 @@ define void @duplicated_incoming_blocks_blend(i32 %x, ptr %ptr) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
 ; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
index 54bbf3f22ec011..9efc8d12f44a39 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-alloca-in-loop.ll
@@ -11,7 +11,7 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], <i64 200, i64 200, i64 200, i64 200>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 200)
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
 ; CHECK:       [[PRED_STORE_IF]]:
@@ -49,11 +49,11 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
 ; CHECK:       [[PRED_STORE_CONTINUE6]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 204
 ; CHECK-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
-; CHECK-NEXT:    br i1 true, label %[[exit:.*]], label %[[SCALAR_PH]]
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
 ; CHECK:       [[SCALAR_PH]]:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 204, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
@@ -64,8 +64,8 @@ define i32 @test(ptr %vf1, i64 %n) {
 ; CHECK-NEXT:    store ptr [[TMP18]], ptr [[ARRAYIDX]], align 8
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV]], 200
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[exit]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK:       [[exit]]:
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
index e4572a2b0a065a..dd2fd0de9938ef 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-switch.ll
@@ -14,8 +14,8 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE6]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 4)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i1> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
@@ -50,7 +50,7 @@ define void @tail_fold_switch(ptr %dst, i32 %0) {
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
 ; CHECK:       [[PRED_STORE_CONTINUE6]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 8
 ; CHECK-NEXT:    br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
index c63dc9979bce3c..427e030471b095 100644
--- a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
+++ b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll
@@ -25,7 +25,7 @@ define i64 @multi_exit_1_exit_count_with_udiv_by_value_in_header(ptr %dst, i64 %
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -91,7 +91,7 @@ define i64 @multi_exit_1_exit_count_with_udiv_by_constant_in_header(ptr %dst, i6
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -158,7 +158,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_value_in_block_executed_uncondi
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 10)
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
@@ -270,7 +270,7 @@ define i64 @multi_exit_2_exit_count_with_udiv_by_constant_in_block_executed_unco
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 10)
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
 ; CHECK:       pred.store.if:
@@ -481,7 +481,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch(ptr %dst, i64 %N
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -548,7 +548,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bounds
 ; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP9]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP9]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
@@ -617,7 +617,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_frozen_value_in_latch(ptr %dst,
 ; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP8]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
@@ -685,7 +685,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_constant_in_latch(ptr %dst, i64
 ; CHECK-NEXT:    [[IV:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[GEP]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP16:![0-9]+]]
@@ -747,7 +747,7 @@ define void @single_exit_tc_with_udiv(ptr %dst, i64 %N) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP4]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
@@ -809,7 +809,7 @@ define i64 @multi_exit_4_exit_count_with_urem_by_value_in_latch(ptr %dst, i64 %N
 ; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP8]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP8]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
@@ -876,7 +876,7 @@ define i64 @multi_exit_4_exit_count_with_urem_by_constant_in_latch(ptr %dst, i64
 ; CHECK-NEXT:    [[IV:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[IV]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[GEP]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP22:![0-9]+]]
@@ -1025,7 +1025,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch1(ptr %dst, i64 %
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
index 048d18ea8422d0..b5bef4aacff3cf 100644
--- a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
+++ b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll
@@ -17,7 +17,7 @@ define i32 @test_icmp_constant_op_zext(ptr %dst) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i16 [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
-; CHECK-NEXT:    store <4 x i8> <i8 109, i8 109, i8 109, i8 109>, ptr [[TMP2]], align 1
+; CHECK-NEXT:    store <4 x i8> splat (i8 109), ptr [[TMP2]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll
index cae6ece9f9d8c9..14608d5068ed33 100644
--- a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll
+++ b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll
@@ -4,24 +4,24 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @test_pr47927_lshr_const_shift_ops(ptr %dst, i32 %f) {
-; CHECK-LABEL: define void @test_pr47927_lshr_const_shift_ops
-; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) {
+; CHECK-LABEL: define void @test_pr47927_lshr_const_shift_ops(
+; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[BROADCAST_SPLAT]], <i32 18, i32 18, i32 18, i32 18>
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <4 x i32> [[BROADCAST_SPLAT]], splat (i32 18)
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i32> [[TMP0]] to <4 x i8>
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
-; CHECK-NEXT:    [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
-; CHECK-NEXT:    store <4 x i8> [[TMP2]], ptr [[TMP5]], align 8
+; CHECK-NEXT:    store <4 x i8> [[TMP1]], ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -64,24 +64,24 @@ exit:
 }
 
 define void @test_shl_const_shift_ops(ptr %dst, i32 %f) {
-; CHECK-LABEL: define void @test_shl_const_shift_ops
-; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) {
+; CHECK-LABEL: define void @test_shl_const_shift_ops(
+; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> [[BROADCAST_SPLAT]], <i32 18, i32 18, i32 18, i32 18>
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = shl <4 x i32> [[BROADCAST_SPLAT]], splat (i32 18)
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i32> [[TMP0]] to <4 x i8>
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
-; CHECK-NEXT:    [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
-; CHECK-NEXT:    store <4 x i8> [[TMP2]], ptr [[TMP5]], align 8
+; CHECK-NEXT:    store <4 x i8> [[TMP1]], ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -124,24 +124,24 @@ exit:
 }
 
 define void @test_ashr_const_shift_ops(ptr %dst, i32 %f) {
-; CHECK-LABEL: define void @test_ashr_const_shift_ops
-; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) {
+; CHECK-LABEL: define void @test_ashr_const_shift_ops(
+; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], <i32 18, i32 18, i32 18, i32 18>
-; CHECK-NEXT:    [[TMP2:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i8>
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], splat (i32 18)
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i32> [[TMP0]] to <4 x i8>
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8
-; CHECK-NEXT:    [[TMP0:%.*]] = add i8 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0
-; CHECK-NEXT:    store <4 x i8> [[TMP2]], ptr [[TMP5]], align 8
+; CHECK-NEXT:    store <4 x i8> [[TMP1]], ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -184,8 +184,8 @@ exit:
 }
 
 define void @test_shl_const_shifted_op(ptr %dst, i32 %f) {
-; CHECK-LABEL: define void @test_shl_const_shifted_op
-; CHECK-SAME: (ptr [[DST:%.*]], i32 [[F:%.*]]) {
+; CHECK-LABEL: define void @test_shl_const_shifted_op(
+; CHECK-SAME: ptr [[DST:%.*]], i32 [[F:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
@@ -199,7 +199,7 @@ define void @test_shl_const_shifted_op(ptr %dst, i32 %f) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
-; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i32> <i32 19, i32 19, i32 19, i32 19>, [[TMP4]]
+; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i32> splat (i32 19), [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i8>
 ; CHECK-NEXT:    store <4 x i8> [[TMP6]], ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
@@ -249,8 +249,8 @@ exit:
 
 
 define void @test_lshr_by_18(ptr %A) {
-; CHECK-LABEL: define void @test_lshr_by_18
-; CHECK-SAME: (ptr [[A:%.*]]) {
+; CHECK-LABEL: define void @test_lshr_by_18(
+; CHECK-SAME: ptr [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
@@ -264,7 +264,7 @@ define void @test_lshr_by_18(ptr %A) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], <i32 18, i32 18, i32 18, i32 18>
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], splat (i32 18)
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i8>
 ; CHECK-NEXT:    store <4 x i8> [[TMP6]], ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
@@ -313,8 +313,8 @@ exit:
 }
 
 define void @test_lshr_by_4(ptr %A) {
-; CHECK-LABEL: define void @test_lshr_by_4
-; CHECK-SAME: (ptr [[A:%.*]]) {
+; CHECK-LABEL: define void @test_lshr_by_4(
+; CHECK-SAME: ptr [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
@@ -328,12 +328,12 @@ define void @test_lshr_by_4(ptr %A) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16>
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i16> [[TMP4]], <i16 4, i16 4, i16 4, i16 4>
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i16> [[TMP4]], splat (i16 4)
 ; CHECK-NEXT:    [[TMP6:%.*]] = trunc <4 x i16> [[TMP5]] to <4 x i8>
 ; CHECK-NEXT:    store <4 x i8> [[TMP6]], ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -375,19 +375,3 @@ loop:
 exit:
   ret void
 }
-;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
-; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
-; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
-; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
-; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
-; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
-; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
-; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
-; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
-;.
diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
index c9fc8beb006d9b..85011d5f446c1d 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll
@@ -153,7 +153,7 @@ define void @blend_chain_iv(i1 %c) {
 ; CHECK-NEXT:    store i16 0, ptr [[TMP6]], align 2
 ; CHECK-NEXT:    store i16 0, ptr [[TMP8]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
 ; CHECK-NEXT:    br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
index 4cee3e3cb6832e..5e8b60b910aed0 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll
@@ -18,7 +18,7 @@ define void @ld_div1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8
@@ -78,7 +78,7 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8
@@ -133,7 +133,7 @@ define void @ld_div3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -142,12 +142,12 @@ define void @ld_div3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -200,7 +200,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
@@ -209,7 +209,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
@@ -217,7 +217,7 @@ define void @ld_div1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP15]], ptr [[TMP13]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
@@ -273,7 +273,7 @@ define void @ld_div2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
@@ -333,7 +333,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
@@ -342,7 +342,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
@@ -350,7 +350,7 @@ define void @ld_div3_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP15]], ptr [[TMP13]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
@@ -403,7 +403,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
@@ -412,7 +412,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
@@ -420,7 +420,7 @@ define void @ld_div1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP15]], ptr [[TMP13]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
@@ -473,7 +473,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
@@ -482,7 +482,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
@@ -490,7 +490,7 @@ define void @ld_div2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP15]], ptr [[TMP13]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       middle.block:
@@ -546,7 +546,7 @@ define void @ld_div3_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
@@ -608,7 +608,7 @@ define void @ld_div1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8
@@ -664,7 +664,7 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -673,12 +673,12 @@ define void @ld_div2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; CHECK:       middle.block:
@@ -730,7 +730,7 @@ define void @ld_div3_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -739,12 +739,12 @@ define void @ld_div3_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; CHECK:       middle.block:
@@ -798,7 +798,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
@@ -807,7 +807,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
@@ -815,7 +815,7 @@ define void @ld_div1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP16]], ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
@@ -872,7 +872,7 @@ define void @ld_div2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
@@ -933,7 +933,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
+; CHECK-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
@@ -942,7 +942,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
@@ -950,7 +950,7 @@ define void @ld_div3_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP16]], ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1004,7 +1004,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
@@ -1013,7 +1013,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
@@ -1021,7 +1021,7 @@ define void @ld_div1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP16]], ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1075,7 +1075,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
@@ -1084,7 +1084,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
@@ -1092,7 +1092,7 @@ define void @ld_div2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP16]], ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
 ; CHECK:       middle.block:
@@ -1149,7 +1149,7 @@ define void @ld_div3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
@@ -1203,14 +1203,14 @@ define void @test_step_is_not_invariant(ptr %A) {
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i16 [[TMP0]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[TMP0]], 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = mul nuw nsw <2 x i32> [[VEC_IND]], [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc <2 x i32> [[TMP3]] to <2 x i16>
-; CHECK-NEXT:    [[TMP5:%.*]] = udiv <2 x i16> [[TMP4]], <i16 6, i16 6>
+; CHECK-NEXT:    [[TMP5:%.*]] = udiv <2 x i16> [[TMP4]], splat (i16 6)
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext <2 x i16> [[TMP5]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP7]]
@@ -1218,8 +1218,8 @@ define void @test_step_is_not_invariant(ptr %A) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP9]]
 ; CHECK-NEXT:    store i16 [[TMP1]], ptr [[TMP8]], align 2
 ; CHECK-NEXT:    store i16 [[TMP2]], ptr [[TMP10]], align 2
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[OFFSET_IDX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 56
 ; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll
index 7f8b33e97360cc..4200ff96689384 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll
@@ -18,7 +18,7 @@ define void @ld_and_neg1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8
@@ -78,7 +78,7 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8
@@ -133,7 +133,7 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], <i64 -3, i64 -3>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -3)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -142,12 +142,12 @@ define void @ld_and_neg3_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -200,7 +200,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
@@ -209,7 +209,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
@@ -217,7 +217,7 @@ define void @ld_and_neg1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP15]], ptr [[TMP13]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       middle.block:
@@ -274,7 +274,7 @@ define void @ld_and_neg2_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
@@ -334,7 +334,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
@@ -343,7 +343,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
@@ -351,7 +351,7 @@ define void @ld_and_neg1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP15]], ptr [[TMP13]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       middle.block:
@@ -404,7 +404,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], <i64 -2, i64 -2>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
@@ -413,7 +413,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
@@ -421,7 +421,7 @@ define void @ld_and_neg2_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP15]], ptr [[TMP13]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
@@ -473,7 +473,7 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], <i64 -2, i64 -2>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -482,12 +482,12 @@ define void @ld_and_neg2_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0
 ; CHECK-NEXT:    store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
 ; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       middle.block:
@@ -541,7 +541,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], <i64 -2, i64 -2>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
@@ -550,7 +550,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
@@ -558,7 +558,7 @@ define void @ld_and_neg2_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP16]], ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; CHECK:       middle.block:
@@ -612,7 +612,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], <i64 -2, i64 -2>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
@@ -621,7 +621,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
@@ -629,7 +629,7 @@ define void @ld_and_neg2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP16]], ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; CHECK:       middle.block:
@@ -683,7 +683,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], <i64 -3, i64 -3>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[VEC_IND]], splat (i64 -3)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
@@ -692,7 +692,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
@@ -700,7 +700,7 @@ define void @ld_and_neg3_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
 ; CHECK-NEXT:    store i64 [[TMP16]], ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
index 098e29eb691661..c5a8e749997548 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll
@@ -16,8 +16,8 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = udiv <8 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEXT:    [[TMP2:%.*]] = urem <8 x i64> [[TMP1]], <i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = udiv <8 x i64> [[VEC_IND]], splat (i64 2)
+; CHECK-NEXT:    [[TMP2:%.*]] = urem <8 x i64> [[TMP1]], splat (i64 3)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x i64> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i64> [[TMP2]], i32 1
@@ -50,12 +50,12 @@ define void @ld_div2_urem3_1(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <8 x i64> [[TMP31]], i64 [[TMP24]], i32 5
 ; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <8 x i64> [[TMP32]], i64 [[TMP25]], i32 6
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <8 x i64> [[TMP33]], i64 [[TMP26]], i32 7
-; CHECK-NEXT:    [[TMP35:%.*]] = add nsw <8 x i64> [[TMP34]], <i64 42, i64 42, i64 42, i64 42, i64 42, i64 42, i64 42, i64 42>
+; CHECK-NEXT:    [[TMP35:%.*]] = add nsw <8 x i64> [[TMP34]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i64, ptr [[TMP36]], i32 0
 ; CHECK-NEXT:    store <8 x i64> [[TMP35]], ptr [[TMP37]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
 ; CHECK-NEXT:    [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
@@ -108,9 +108,9 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <8 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
-; CHECK-NEXT:    [[TMP2:%.*]] = udiv <8 x i64> [[TMP1]], <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEXT:    [[TMP3:%.*]] = urem <8 x i64> [[TMP2]], <i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3, i64 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <8 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT:    [[TMP2:%.*]] = udiv <8 x i64> [[TMP1]], splat (i64 2)
+; CHECK-NEXT:    [[TMP3:%.*]] = urem <8 x i64> [[TMP2]], splat (i64 3)
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i64> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x i64> [[TMP3]], i32 1
@@ -143,12 +143,12 @@ define void @ld_div2_urem3_2(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <8 x i64> [[TMP32]], i64 [[TMP25]], i32 5
 ; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <8 x i64> [[TMP33]], i64 [[TMP26]], i32 6
 ; CHECK-NEXT:    [[TMP35:%.*]] = insertelement <8 x i64> [[TMP34]], i64 [[TMP27]], i32 7
-; CHECK-NEXT:    [[TMP36:%.*]] = add nsw <8 x i64> [[TMP35]], <i64 42, i64 42, i64 42, i64 42, i64 42, i64 42, i64 42, i64 42>
+; CHECK-NEXT:    [[TMP36:%.*]] = add nsw <8 x i64> [[TMP35]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i64, ptr [[TMP37]], i32 0
 ; CHECK-NEXT:    store <8 x i64> [[TMP36]], ptr [[TMP38]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
 ; CHECK-NEXT:    [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; CHECK:       middle.block:
@@ -201,7 +201,7 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = udiv <8 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[TMP1:%.*]] = udiv <8 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i64> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x i64> [[TMP1]], i32 1
@@ -234,12 +234,12 @@ define void @ld_div4(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <8 x i64> [[TMP30]], i64 [[TMP23]], i32 5
 ; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <8 x i64> [[TMP31]], i64 [[TMP24]], i32 6
 ; CHECK-NEXT:    [[TMP33:%.*]] = insertelement <8 x i64> [[TMP32]], i64 [[TMP25]], i32 7
-; CHECK-NEXT:    [[TMP34:%.*]] = add nsw <8 x i64> [[TMP33]], <i64 42, i64 42, i64 42, i64 42, i64 42, i64 42, i64 42, i64 42>
+; CHECK-NEXT:    [[TMP34:%.*]] = add nsw <8 x i64> [[TMP33]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i64, ptr [[TMP35]], i32 0
 ; CHECK-NEXT:    store <8 x i64> [[TMP34]], ptr [[TMP36]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
 ; CHECK-NEXT:    [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; CHECK-NEXT:    br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       middle.block:
@@ -296,7 +296,7 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP4]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <8 x i64> [[BROADCAST_SPLAT]], <i64 42, i64 42, i64 42, i64 42, i64 42, i64 42, i64 42, i64 42>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <8 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0
 ; CHECK-NEXT:    store <8 x i64> [[TMP5]], ptr [[TMP7]], align 8
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll
index f79772915b0248..17b94495b517ce 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll
@@ -19,7 +19,7 @@ define void @ld_lshr0_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
 ; VF2-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8
-; VF2-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; VF2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
 ; VF2-NEXT:    store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8
@@ -58,7 +58,7 @@ define void @ld_lshr0_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
 ; VF4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
-; VF4-NEXT:    [[TMP4:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP4:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; VF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
 ; VF4-NEXT:    store <4 x i64> [[TMP4]], ptr [[TMP6]], align 8
@@ -118,7 +118,7 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
 ; VF2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
 ; VF2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; VF2-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
 ; VF2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
 ; VF2-NEXT:    store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8
@@ -154,7 +154,7 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VF4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; VF4-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; VF4-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
 ; VF4-NEXT:    [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0
 ; VF4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
 ; VF4-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i32 1
@@ -171,12 +171,12 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP11]], i32 1
 ; VF4-NEXT:    [[TMP16:%.*]] = insertelement <4 x i64> [[TMP15]], i64 [[TMP12]], i32 2
 ; VF4-NEXT:    [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 3
-; VF4-NEXT:    [[TMP18:%.*]] = add nsw <4 x i64> [[TMP17]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP18:%.*]] = add nsw <4 x i64> [[TMP17]], splat (i64 42)
 ; VF4-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i32 0
 ; VF4-NEXT:    store <4 x i64> [[TMP18]], ptr [[TMP20]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; VF4-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; VF4-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; VF4:       middle.block:
@@ -232,7 +232,7 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
 ; VF2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0
 ; VF2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; VF2-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
 ; VF2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
 ; VF2-NEXT:    store <2 x i64> [[TMP4]], ptr [[TMP6]], align 8
@@ -272,7 +272,7 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8
 ; VF4-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0
 ; VF4-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; VF4-NEXT:    [[TMP4:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP4:%.*]] = add nsw <4 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
 ; VF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
 ; VF4-NEXT:    store <4 x i64> [[TMP4]], ptr [[TMP6]], align 8
@@ -338,7 +338,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
 ; VF2-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
 ; VF2-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; VF2-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
 ; VF2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
@@ -346,7 +346,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
 ; VF2-NEXT:    store i64 [[TMP15]], ptr [[TMP13]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
 ; VF2-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
 ; VF2-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; VF2:       middle.block:
@@ -399,7 +399,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1
 ; VF4-NEXT:    [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2
 ; VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3
-; VF4-NEXT:    [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42)
 ; VF4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
@@ -413,7 +413,7 @@ define void @ld_lshr0_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP29:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
 ; VF4-NEXT:    store i64 [[TMP29]], ptr [[TMP25]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
 ; VF4-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
 ; VF4-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; VF4:       middle.block:
@@ -469,7 +469,7 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
 ; VF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0
 ; VF2-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8
-; VF2-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; VF2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
@@ -515,7 +515,7 @@ define void @ld_lshr1_step2_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
 ; VF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
 ; VF4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8
-; VF4-NEXT:    [[TMP7:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP7:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; VF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
@@ -590,7 +590,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
 ; VF2-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
 ; VF2-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; VF2-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
 ; VF2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
@@ -598,7 +598,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
 ; VF2-NEXT:    store i64 [[TMP15]], ptr [[TMP13]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
 ; VF2-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF2-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; VF2:       middle.block:
@@ -651,7 +651,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1
 ; VF4-NEXT:    [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2
 ; VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3
-; VF4-NEXT:    [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42)
 ; VF4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
@@ -665,7 +665,7 @@ define void @ld_lshr0_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP29:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
 ; VF4-NEXT:    store i64 [[TMP29]], ptr [[TMP25]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
 ; VF4-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF4-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; VF4:       middle.block:
@@ -718,7 +718,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], <i64 1, i64 1>
+; VF2-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
 ; VF2-NEXT:    [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
 ; VF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
 ; VF2-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
@@ -727,7 +727,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP6]], align 8
 ; VF2-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP7]], i32 0
 ; VF2-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP8]], i32 1
-; VF2-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP11:%.*]] = add nsw <2 x i64> [[TMP10]], splat (i64 42)
 ; VF2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP11]], i32 0
@@ -735,7 +735,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP11]], i32 1
 ; VF2-NEXT:    store i64 [[TMP15]], ptr [[TMP13]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
 ; VF2-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF2-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; VF2:       middle.block:
@@ -771,7 +771,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; VF4-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
 ; VF4-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
 ; VF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
 ; VF4-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP4]], i32 1
@@ -788,7 +788,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 1
 ; VF4-NEXT:    [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 2
 ; VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 3
-; VF4-NEXT:    [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP21:%.*]] = add nsw <4 x i64> [[TMP20]], splat (i64 42)
 ; VF4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
@@ -802,7 +802,7 @@ define void @ld_lshr1_step3_start0_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP29:%.*]] = extractelement <4 x i64> [[TMP21]], i32 3
 ; VF4-NEXT:    store i64 [[TMP29]], ptr [[TMP25]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
 ; VF4-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF4-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; VF4:       middle.block:
@@ -855,7 +855,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND]], <i64 1, i64 1>
+; VF2-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
 ; VF2-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
 ; VF2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
 ; VF2-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
@@ -864,12 +864,12 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP7:%.*]] = load i64, ptr [[TMP5]], align 8
 ; VF2-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0
 ; VF2-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> [[TMP8]], i64 [[TMP7]], i32 1
-; VF2-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP10:%.*]] = add nsw <2 x i64> [[TMP9]], splat (i64 42)
 ; VF2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0
 ; VF2-NEXT:    store <2 x i64> [[TMP10]], ptr [[TMP12]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
 ; VF2-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
 ; VF2-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; VF2:       middle.block:
@@ -902,7 +902,7 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 1, i64 2, i64 3, i64 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VF4-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; VF4-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; VF4-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
 ; VF4-NEXT:    [[TMP2:%.*]] = extractelement <4 x i64> [[TMP1]], i32 0
 ; VF4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
 ; VF4-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP1]], i32 1
@@ -919,12 +919,12 @@ define void @ld_lshr1_step1_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP15:%.*]] = insertelement <4 x i64> [[TMP14]], i64 [[TMP11]], i32 1
 ; VF4-NEXT:    [[TMP16:%.*]] = insertelement <4 x i64> [[TMP15]], i64 [[TMP12]], i32 2
 ; VF4-NEXT:    [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 3
-; VF4-NEXT:    [[TMP18:%.*]] = add nsw <4 x i64> [[TMP17]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP18:%.*]] = add nsw <4 x i64> [[TMP17]], splat (i64 42)
 ; VF4-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[TMP19]], i32 0
 ; VF4-NEXT:    store <4 x i64> [[TMP18]], ptr [[TMP20]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; VF4-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
 ; VF4-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; VF4:       middle.block:
@@ -981,7 +981,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
 ; VF2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
 ; VF2-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
-; VF2-NEXT:    [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP6:%.*]] = add nsw <2 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; VF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF2-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
@@ -1028,7 +1028,7 @@ define void @ld_lshr1_step2_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
 ; VF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0
 ; VF4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8
-; VF4-NEXT:    [[TMP8:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP8:%.*]] = add nsw <4 x i64> [[WIDE_LOAD]], splat (i64 42)
 ; VF4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
@@ -1095,7 +1095,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], <i64 1, i64 1>
+; VF2-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 1)
 ; VF2-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; VF2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
 ; VF2-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
@@ -1104,7 +1104,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; VF2-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; VF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF2-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
@@ -1112,7 +1112,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
 ; VF2-NEXT:    store i64 [[TMP16]], ptr [[TMP14]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
 ; VF2-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF2-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; VF2:       middle.block:
@@ -1149,7 +1149,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
 ; VF4-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT:    [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; VF4-NEXT:    [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 1)
 ; VF4-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
 ; VF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
 ; VF4-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
@@ -1166,7 +1166,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1
 ; VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3
-; VF4-NEXT:    [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42)
 ; VF4-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
@@ -1180,7 +1180,7 @@ define void @ld_lshr1_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP30:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
 ; VF4-NEXT:    store i64 [[TMP30]], ptr [[TMP26]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
 ; VF4-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF4-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; VF4:       middle.block:
@@ -1234,7 +1234,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], <i64 2, i64 2>
+; VF2-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[VEC_IND]], splat (i64 2)
 ; VF2-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; VF2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
 ; VF2-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
@@ -1243,7 +1243,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; VF2-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; VF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF2-NEXT:    [[TMP15:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
@@ -1251,7 +1251,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
 ; VF2-NEXT:    store i64 [[TMP16]], ptr [[TMP14]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
 ; VF2-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF2-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; VF2:       middle.block:
@@ -1288,7 +1288,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
 ; VF4-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT:    [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
+; VF4-NEXT:    [[TMP5:%.*]] = lshr <4 x i64> [[VEC_IND]], splat (i64 2)
 ; VF4-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0
 ; VF4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
 ; VF4-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP5]], i32 1
@@ -1305,7 +1305,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 1
 ; VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 2
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3
-; VF4-NEXT:    [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP22:%.*]] = add nsw <4 x i64> [[TMP21]], splat (i64 42)
 ; VF4-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
@@ -1319,7 +1319,7 @@ define void @ld_lshr2_step3_start1_ind1(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP30:%.*]] = extractelement <4 x i64> [[TMP22]], i32 3
 ; VF4-NEXT:    store i64 [[TMP30]], ptr [[TMP26]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
 ; VF4-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF4-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; VF4:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
index 098835afa4480a..4bc5aee381bc9a 100644
--- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll
@@ -15,8 +15,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VF2-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; VF2-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
-; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 1, i64 1>
+; VF2-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1)
 ; VF2-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
 ; VF2-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; VF2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
@@ -26,13 +26,13 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; VF2-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; VF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
 ; VF2-NEXT:    store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; VF2-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; VF2:       middle.block:
@@ -70,8 +70,8 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VF4-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
 ; VF4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; VF4-NEXT:    [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
-; VF4-NEXT:    [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 1, i64 1, i64 1, i64 1>
+; VF4-NEXT:    [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT:    [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1)
 ; VF4-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
 ; VF4-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
 ; VF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
@@ -89,13 +89,13 @@ define void @ld_div1_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
 ; VF4-NEXT:    [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
 ; VF4-NEXT:    [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
-; VF4-NEXT:    [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42)
 ; VF4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0
 ; VF4-NEXT:    store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; VF4-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; VF4:       middle.block:
@@ -163,7 +163,7 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
 ; VF2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i64 0
 ; VF2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
-; VF2-NEXT:    [[TMP7:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP7:%.*]] = add nsw <2 x i64> [[BROADCAST_SPLAT]], splat (i64 42)
 ; VF2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
 ; VF2-NEXT:    store <2 x i64> [[TMP7]], ptr [[TMP9]], align 8
@@ -205,8 +205,8 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VF4-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
 ; VF4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; VF4-NEXT:    [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
-; VF4-NEXT:    [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 2, i64 2, i64 2, i64 2>
+; VF4-NEXT:    [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
+; VF4-NEXT:    [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF4-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
 ; VF4-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
 ; VF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
@@ -224,13 +224,13 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
 ; VF4-NEXT:    [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
 ; VF4-NEXT:    [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
-; VF4-NEXT:    [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42)
 ; VF4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0
 ; VF4-NEXT:    store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; VF4-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ; VF4:       middle.block:
@@ -291,8 +291,8 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VF2-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; VF2-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
-; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 3, i64 3>
+; VF2-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
+; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3)
 ; VF2-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
 ; VF2-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; VF2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
@@ -302,13 +302,13 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; VF2-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; VF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
 ; VF2-NEXT:    store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; VF2-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; VF2:       middle.block:
@@ -346,8 +346,8 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; VF4-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
 ; VF4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; VF4-NEXT:    [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
-; VF4-NEXT:    [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 3, i64 3, i64 3, i64 3>
+; VF4-NEXT:    [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
+; VF4-NEXT:    [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3)
 ; VF4-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
 ; VF4-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
 ; VF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
@@ -365,13 +365,13 @@ define void @ld_div3_step1_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
 ; VF4-NEXT:    [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
 ; VF4-NEXT:    [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
-; VF4-NEXT:    [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42)
 ; VF4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0
 ; VF4-NEXT:    store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
 ; VF4-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
 ; VF4:       middle.block:
@@ -434,8 +434,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
-; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 1, i64 1>
+; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1)
 ; VF2-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
 ; VF2-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
 ; VF2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
@@ -445,7 +445,7 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
 ; VF2-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
-; VF2-NEXT:    [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
@@ -453,8 +453,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
 ; VF2-NEXT:    store i64 [[TMP17]], ptr [[TMP15]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
 ; VF2-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; VF2:       middle.block:
@@ -496,8 +496,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT:    [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
-; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 1, i64 1, i64 1, i64 1>
+; VF4-NEXT:    [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1)
 ; VF4-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
 ; VF4-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
 ; VF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
@@ -515,7 +515,7 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
 ; VF4-NEXT:    [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
-; VF4-NEXT:    [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
 ; VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
@@ -529,8 +529,8 @@ define void @ld_div1_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
 ; VF4-NEXT:    store i64 [[TMP31]], ptr [[TMP27]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
 ; VF4-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
 ; VF4:       middle.block:
@@ -593,8 +593,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
 ; VF2-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
 ; VF2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
@@ -604,7 +604,7 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
 ; VF2-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
-; VF2-NEXT:    [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
@@ -612,8 +612,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
 ; VF2-NEXT:    store i64 [[TMP17]], ptr [[TMP15]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
 ; VF2-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; VF2:       middle.block:
@@ -655,8 +655,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT:    [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
-; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 2, i64 2, i64 2, i64 2>
+; VF4-NEXT:    [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
+; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF4-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
 ; VF4-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
 ; VF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
@@ -674,7 +674,7 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
 ; VF4-NEXT:    [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
-; VF4-NEXT:    [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
 ; VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
@@ -688,8 +688,8 @@ define void @ld_div2_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
 ; VF4-NEXT:    store i64 [[TMP31]], ptr [[TMP27]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
 ; VF4-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; VF4:       middle.block:
@@ -752,8 +752,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
-; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 3, i64 3>
+; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
+; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3)
 ; VF2-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
 ; VF2-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
 ; VF2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
@@ -763,7 +763,7 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
 ; VF2-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
-; VF2-NEXT:    [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
@@ -771,8 +771,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
 ; VF2-NEXT:    store i64 [[TMP17]], ptr [[TMP15]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
 ; VF2-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; VF2:       middle.block:
@@ -814,8 +814,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT:    [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
-; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 3, i64 3, i64 3, i64 3>
+; VF4-NEXT:    [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
+; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3)
 ; VF4-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
 ; VF4-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
 ; VF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
@@ -833,7 +833,7 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
 ; VF4-NEXT:    [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
-; VF4-NEXT:    [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
 ; VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
@@ -847,8 +847,8 @@ define void @ld_div3_step2_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
 ; VF4-NEXT:    store i64 [[TMP31]], ptr [[TMP27]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 500
 ; VF4-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; VF4:       middle.block:
@@ -911,8 +911,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
-; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 1, i64 1>
+; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1)
 ; VF2-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
 ; VF2-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
 ; VF2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
@@ -922,7 +922,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
 ; VF2-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
-; VF2-NEXT:    [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
@@ -930,8 +930,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
 ; VF2-NEXT:    store i64 [[TMP17]], ptr [[TMP15]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF2-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; VF2:       middle.block:
@@ -973,8 +973,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT:    [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
-; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 1, i64 1, i64 1, i64 1>
+; VF4-NEXT:    [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1)
 ; VF4-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
 ; VF4-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
 ; VF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
@@ -992,7 +992,7 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
 ; VF4-NEXT:    [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
-; VF4-NEXT:    [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
 ; VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
@@ -1006,8 +1006,8 @@ define void @ld_div1_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
 ; VF4-NEXT:    store i64 [[TMP31]], ptr [[TMP27]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF4-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; VF4:       middle.block:
@@ -1070,8 +1070,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
 ; VF2-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
 ; VF2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
@@ -1081,7 +1081,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
 ; VF2-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
-; VF2-NEXT:    [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
@@ -1089,8 +1089,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
 ; VF2-NEXT:    store i64 [[TMP17]], ptr [[TMP15]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF2-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; VF2:       middle.block:
@@ -1132,8 +1132,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT:    [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
-; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 2, i64 2, i64 2, i64 2>
+; VF4-NEXT:    [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
+; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF4-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
 ; VF4-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
 ; VF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
@@ -1151,7 +1151,7 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
 ; VF4-NEXT:    [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
-; VF4-NEXT:    [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
 ; VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
@@ -1165,8 +1165,8 @@ define void @ld_div2_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
 ; VF4-NEXT:    store i64 [[TMP31]], ptr [[TMP27]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF4-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; VF4:       middle.block:
@@ -1229,8 +1229,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
-; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 3, i64 3>
+; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
+; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3)
 ; VF2-NEXT:    [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[TMP3]]
 ; VF2-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
 ; VF2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP5]]
@@ -1240,7 +1240,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP8]], align 8
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> poison, i64 [[TMP9]], i32 0
 ; VF2-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> [[TMP11]], i64 [[TMP10]], i32 1
-; VF2-NEXT:    [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP13:%.*]] = add nsw <2 x i64> [[TMP12]], splat (i64 42)
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
@@ -1248,8 +1248,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
 ; VF2-NEXT:    store i64 [[TMP17]], ptr [[TMP15]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF2-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; VF2:       middle.block:
@@ -1291,8 +1291,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 6
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT:    [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
-; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 3, i64 3, i64 3, i64 3>
+; VF4-NEXT:    [[TMP4:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
+; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3)
 ; VF4-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[TMP4]], [[TMP5]]
 ; VF4-NEXT:    [[TMP7:%.*]] = extractelement <4 x i64> [[TMP6]], i32 0
 ; VF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
@@ -1310,7 +1310,7 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP20:%.*]] = insertelement <4 x i64> [[TMP19]], i64 [[TMP16]], i32 1
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 2
 ; VF4-NEXT:    [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 3
-; VF4-NEXT:    [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP23:%.*]] = add nsw <4 x i64> [[TMP22]], splat (i64 42)
 ; VF4-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
@@ -1324,8 +1324,8 @@ define void @ld_div3_step3_start0_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP31:%.*]] = extractelement <4 x i64> [[TMP23]], i32 3
 ; VF4-NEXT:    store i64 [[TMP31]], ptr [[TMP27]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF4-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
 ; VF4:       middle.block:
@@ -1387,8 +1387,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
-; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 1, i64 1>
+; VF2-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1)
 ; VF2-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
 ; VF2-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; VF2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
@@ -1398,13 +1398,13 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; VF2-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; VF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
 ; VF2-NEXT:    store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
 ; VF2-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; VF2:       middle.block:
@@ -1443,8 +1443,8 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
 ; VF4-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; VF4-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT:    [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
-; VF4-NEXT:    [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 1, i64 1, i64 1, i64 1>
+; VF4-NEXT:    [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT:    [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1)
 ; VF4-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
 ; VF4-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
 ; VF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
@@ -1462,13 +1462,13 @@ define void @ld_div1_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
 ; VF4-NEXT:    [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
 ; VF4-NEXT:    [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
-; VF4-NEXT:    [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42)
 ; VF4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0
 ; VF4-NEXT:    store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
 ; VF4-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
 ; VF4:       middle.block:
@@ -1530,8 +1530,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
 ; VF2-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; VF2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
@@ -1541,13 +1541,13 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; VF2-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; VF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
 ; VF2-NEXT:    store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
 ; VF2-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; VF2:       middle.block:
@@ -1586,8 +1586,8 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
 ; VF4-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; VF4-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT:    [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
-; VF4-NEXT:    [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 2, i64 2, i64 2, i64 2>
+; VF4-NEXT:    [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
+; VF4-NEXT:    [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF4-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
 ; VF4-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
 ; VF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
@@ -1605,13 +1605,13 @@ define void @ld_div2_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
 ; VF4-NEXT:    [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
 ; VF4-NEXT:    [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
-; VF4-NEXT:    [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42)
 ; VF4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0
 ; VF4-NEXT:    store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
 ; VF4-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
 ; VF4:       middle.block:
@@ -1673,8 +1673,8 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; VF2-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF2-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
-; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 3, i64 3>
+; VF2-NEXT:    [[TMP1:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
+; VF2-NEXT:    [[TMP2:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3)
 ; VF2-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
 ; VF2-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
 ; VF2-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
@@ -1684,13 +1684,13 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP7]], align 8
 ; VF2-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> poison, i64 [[TMP8]], i32 0
 ; VF2-NEXT:    [[TMP11:%.*]] = insertelement <2 x i64> [[TMP10]], i64 [[TMP9]], i32 1
-; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP12:%.*]] = add nsw <2 x i64> [[TMP11]], splat (i64 42)
 ; VF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0
 ; VF2-NEXT:    store <2 x i64> [[TMP12]], ptr [[TMP14]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
 ; VF2-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; VF2:       middle.block:
@@ -1729,8 +1729,8 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
 ; VF4-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
 ; VF4-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; VF4-NEXT:    [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
-; VF4-NEXT:    [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 3, i64 3, i64 3, i64 3>
+; VF4-NEXT:    [[TMP1:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
+; VF4-NEXT:    [[TMP2:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3)
 ; VF4-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[TMP1]], [[TMP2]]
 ; VF4-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
 ; VF4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]]
@@ -1748,13 +1748,13 @@ define void @ld_div3_step1_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP17:%.*]] = insertelement <4 x i64> [[TMP16]], i64 [[TMP13]], i32 1
 ; VF4-NEXT:    [[TMP18:%.*]] = insertelement <4 x i64> [[TMP17]], i64 [[TMP14]], i32 2
 ; VF4-NEXT:    [[TMP19:%.*]] = insertelement <4 x i64> [[TMP18]], i64 [[TMP15]], i32 3
-; VF4-NEXT:    [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP20:%.*]] = add nsw <4 x i64> [[TMP19]], splat (i64 42)
 ; VF4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP0]]
 ; VF4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i32 0
 ; VF4-NEXT:    store <4 x i64> [[TMP20]], ptr [[TMP22]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
 ; VF4-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
 ; VF4:       middle.block:
@@ -1818,8 +1818,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
-; VF2-NEXT:    [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 1, i64 1>
+; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT:    [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1)
 ; VF2-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
 ; VF2-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
 ; VF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
@@ -1829,7 +1829,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
 ; VF2-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
 ; VF2-NEXT:    [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
-; VF2-NEXT:    [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
 ; VF2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
@@ -1837,8 +1837,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
 ; VF2-NEXT:    store i64 [[TMP18]], ptr [[TMP16]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
 ; VF2-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; VF2:       middle.block:
@@ -1881,8 +1881,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
 ; VF4-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
-; VF4-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 1, i64 1, i64 1, i64 1>
+; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1)
 ; VF4-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
 ; VF4-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
 ; VF4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
@@ -1900,7 +1900,7 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
 ; VF4-NEXT:    [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
 ; VF4-NEXT:    [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
-; VF4-NEXT:    [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42)
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
@@ -1914,8 +1914,8 @@ define void @ld_div1_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
 ; VF4-NEXT:    store i64 [[TMP32]], ptr [[TMP28]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
 ; VF4-NEXT:    br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; VF4:       middle.block:
@@ -1979,8 +1979,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; VF2-NEXT:    [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT:    [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
 ; VF2-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
 ; VF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
@@ -1990,7 +1990,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
 ; VF2-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
 ; VF2-NEXT:    [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
-; VF2-NEXT:    [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
 ; VF2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
@@ -1998,8 +1998,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
 ; VF2-NEXT:    store i64 [[TMP18]], ptr [[TMP16]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
 ; VF2-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
 ; VF2:       middle.block:
@@ -2042,8 +2042,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
 ; VF4-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
-; VF4-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 2, i64 2, i64 2, i64 2>
+; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
+; VF4-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF4-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
 ; VF4-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
 ; VF4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
@@ -2061,7 +2061,7 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
 ; VF4-NEXT:    [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
 ; VF4-NEXT:    [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
-; VF4-NEXT:    [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42)
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
@@ -2075,8 +2075,8 @@ define void @ld_div2_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
 ; VF4-NEXT:    store i64 [[TMP32]], ptr [[TMP28]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
 ; VF4-NEXT:    br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
 ; VF4:       middle.block:
@@ -2140,8 +2140,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
-; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
-; VF2-NEXT:    [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 3, i64 3>
+; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
+; VF2-NEXT:    [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3)
 ; VF2-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
 ; VF2-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
 ; VF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
@@ -2151,7 +2151,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
 ; VF2-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
 ; VF2-NEXT:    [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
-; VF2-NEXT:    [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
 ; VF2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
@@ -2159,8 +2159,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
 ; VF2-NEXT:    store i64 [[TMP18]], ptr [[TMP16]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 4)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 498
 ; VF2-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
 ; VF2:       middle.block:
@@ -2203,8 +2203,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 2
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 4
 ; VF4-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 6
-; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
-; VF4-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 3, i64 3, i64 3, i64 3>
+; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
+; VF4-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3)
 ; VF4-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
 ; VF4-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
 ; VF4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
@@ -2222,7 +2222,7 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
 ; VF4-NEXT:    [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
 ; VF4-NEXT:    [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
-; VF4-NEXT:    [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42)
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
@@ -2236,8 +2236,8 @@ define void @ld_div3_step2_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
 ; VF4-NEXT:    store i64 [[TMP32]], ptr [[TMP28]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 496
 ; VF4-NEXT:    br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
 ; VF4:       middle.block:
@@ -2301,8 +2301,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 1, i64 1>
-; VF2-NEXT:    [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 1, i64 1>
+; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 1)
+; VF2-NEXT:    [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 1)
 ; VF2-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
 ; VF2-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
 ; VF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
@@ -2312,7 +2312,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
 ; VF2-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
 ; VF2-NEXT:    [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
-; VF2-NEXT:    [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
 ; VF2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
@@ -2320,8 +2320,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
 ; VF2-NEXT:    store i64 [[TMP18]], ptr [[TMP16]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF2-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
 ; VF2:       middle.block:
@@ -2364,8 +2364,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
 ; VF4-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
-; VF4-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 1, i64 1, i64 1, i64 1>
+; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 1)
+; VF4-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 1)
 ; VF4-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
 ; VF4-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
 ; VF4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
@@ -2383,7 +2383,7 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
 ; VF4-NEXT:    [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
 ; VF4-NEXT:    [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
-; VF4-NEXT:    [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42)
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
@@ -2397,8 +2397,8 @@ define void @ld_div1_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
 ; VF4-NEXT:    store i64 [[TMP32]], ptr [[TMP28]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF4-NEXT:    br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
 ; VF4:       middle.block:
@@ -2462,8 +2462,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 2, i64 2>
-; VF2-NEXT:    [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 2)
+; VF2-NEXT:    [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
 ; VF2-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
 ; VF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
@@ -2473,7 +2473,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
 ; VF2-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
 ; VF2-NEXT:    [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
-; VF2-NEXT:    [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
 ; VF2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
@@ -2481,8 +2481,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
 ; VF2-NEXT:    store i64 [[TMP18]], ptr [[TMP16]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF2-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
 ; VF2:       middle.block:
@@ -2525,8 +2525,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
 ; VF4-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
-; VF4-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 2, i64 2, i64 2, i64 2>
+; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 2)
+; VF4-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF4-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
 ; VF4-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
 ; VF4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
@@ -2544,7 +2544,7 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
 ; VF4-NEXT:    [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
 ; VF4-NEXT:    [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
-; VF4-NEXT:    [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42)
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
@@ -2558,8 +2558,8 @@ define void @ld_div2_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
 ; VF4-NEXT:    store i64 [[TMP32]], ptr [[TMP28]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF4-NEXT:    br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
 ; VF4:       middle.block:
@@ -2623,8 +2623,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[TMP0]]
 ; VF2-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
 ; VF2-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
-; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], <i64 3, i64 3>
-; VF2-NEXT:    [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], <i64 3, i64 3>
+; VF2-NEXT:    [[TMP3:%.*]] = udiv <2 x i64> [[VEC_IND]], splat (i64 3)
+; VF2-NEXT:    [[TMP4:%.*]] = udiv <2 x i64> [[VEC_IND2]], splat (i64 3)
 ; VF2-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
 ; VF2-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
 ; VF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP6]]
@@ -2634,7 +2634,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP11:%.*]] = load i64, ptr [[TMP9]], align 8
 ; VF2-NEXT:    [[TMP12:%.*]] = insertelement <2 x i64> poison, i64 [[TMP10]], i32 0
 ; VF2-NEXT:    [[TMP13:%.*]] = insertelement <2 x i64> [[TMP12]], i64 [[TMP11]], i32 1
-; VF2-NEXT:    [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], <i64 42, i64 42>
+; VF2-NEXT:    [[TMP14:%.*]] = add nsw <2 x i64> [[TMP13]], splat (i64 42)
 ; VF2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i64> [[TMP14]], i32 0
@@ -2642,8 +2642,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF2-NEXT:    [[TMP18:%.*]] = extractelement <2 x i64> [[TMP14]], i32 1
 ; VF2-NEXT:    store i64 [[TMP18]], ptr [[TMP16]], align 8
 ; VF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 6, i64 6>
-; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], <i64 2, i64 2>
+; VF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 6)
+; VF2-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i64> [[VEC_IND2]], splat (i64 2)
 ; VF2-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF2-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
 ; VF2:       middle.block:
@@ -2686,8 +2686,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 3
 ; VF4-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6
 ; VF4-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 9
-; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], <i64 3, i64 3, i64 3, i64 3>
-; VF4-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], <i64 3, i64 3, i64 3, i64 3>
+; VF4-NEXT:    [[TMP5:%.*]] = udiv <4 x i64> [[VEC_IND]], splat (i64 3)
+; VF4-NEXT:    [[TMP6:%.*]] = udiv <4 x i64> [[VEC_IND2]], splat (i64 3)
 ; VF4-NEXT:    [[TMP7:%.*]] = add <4 x i64> [[TMP5]], [[TMP6]]
 ; VF4-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
 ; VF4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
@@ -2705,7 +2705,7 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 1
 ; VF4-NEXT:    [[TMP22:%.*]] = insertelement <4 x i64> [[TMP21]], i64 [[TMP18]], i32 2
 ; VF4-NEXT:    [[TMP23:%.*]] = insertelement <4 x i64> [[TMP22]], i64 [[TMP19]], i32 3
-; VF4-NEXT:    [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], <i64 42, i64 42, i64 42, i64 42>
+; VF4-NEXT:    [[TMP24:%.*]] = add nsw <4 x i64> [[TMP23]], splat (i64 42)
 ; VF4-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]]
 ; VF4-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP2]]
 ; VF4-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]]
@@ -2719,8 +2719,8 @@ define void @ld_div3_step3_start1_ind2(ptr noalias %A, ptr noalias %B) {
 ; VF4-NEXT:    [[TMP32:%.*]] = extractelement <4 x i64> [[TMP24]], i32 3
 ; VF4-NEXT:    store i64 [[TMP32]], ptr [[TMP28]], align 8
 ; VF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 12, i64 12, i64 12, i64 12>
-; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], <i64 4, i64 4, i64 4, i64 4>
+; VF4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 12)
+; VF4-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i64> [[VEC_IND2]], splat (i64 4)
 ; VF4-NEXT:    [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT]], 332
 ; VF4-NEXT:    br i1 [[TMP33]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
 ; VF4:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/vector-geps.ll b/llvm/test/Transforms/LoopVectorize/vector-geps.ll
index c9c1cd1c1817b8..e307d93b1896a8 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-geps.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-geps.ll
@@ -20,7 +20,7 @@ define void @vector_gep_stored(ptr %a, ptr %b, i64 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    store <4 x ptr> [[TMP0]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
index b3ec3e8f0f3c63..cd43135d48f161 100644
--- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
@@ -365,7 +365,7 @@ define void @test_versioned_with_non_ex_use(i32 %offset, ptr noalias %dst.1, ptr
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i32 0
 ; CHECK-NEXT:    store <4 x i32> zeroinitializer, ptr [[TMP21]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
 ; CHECK-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; CHECK:       middle.block:
@@ -440,7 +440,7 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
 ; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 0
-; CHECK-NEXT:    store <4 x i16> <i16 1, i16 1, i16 1, i16 1>, ptr [[TMP5]], align 2
+; CHECK-NEXT:    store <4 x i16> splat (i16 1), ptr [[TMP5]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
@@ -507,7 +507,7 @@ define void @sext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 -3
-; CHECK-NEXT:    store <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, ptr [[TMP7]], align 2
+; CHECK-NEXT:    store <4 x i16> splat (i16 -1), ptr [[TMP7]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
 ; CHECK:       middle.block:
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-int-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-int-row-major.ll
index eb87800daea99c..c203a2a27c8132 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-int-row-major.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/dot-product-int-row-major.ll
@@ -152,41 +152,41 @@ define <1 x i32> @dotproduct_i32_v8_constvector(<8 x i32> %a) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i32> poison, i32 [[TMP0]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT]], <1 x i32> poison, <1 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], <i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <1 x i32> [[SPLAT_SPLAT]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 1
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT1:%.*]] = insertelement <1 x i32> poison, i32 [[TMP2]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT2:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT1]], <1 x i32> poison, <1 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT2]], <i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <1 x i32> [[SPLAT_SPLAT2]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = add <1 x i32> [[TMP1]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 2
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT3:%.*]] = insertelement <1 x i32> poison, i32 [[TMP5]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT4:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT3]], <1 x i32> poison, <1 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = mul <1 x i32> [[SPLAT_SPLAT4]], <i32 3>
+; CHECK-NEXT:    [[TMP6:%.*]] = mul <1 x i32> [[SPLAT_SPLAT4]], splat (i32 3)
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <1 x i32> [[TMP4]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 3
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT5:%.*]] = insertelement <1 x i32> poison, i32 [[TMP8]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT6:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT5]], <1 x i32> poison, <1 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT6]], <i32 4>
+; CHECK-NEXT:    [[TMP9:%.*]] = mul <1 x i32> [[SPLAT_SPLAT6]], splat (i32 4)
 ; CHECK-NEXT:    [[TMP10:%.*]] = add <1 x i32> [[TMP7]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 4
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT7:%.*]] = insertelement <1 x i32> poison, i32 [[TMP11]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT8:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT7]], <1 x i32> poison, <1 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP12:%.*]] = mul <1 x i32> [[SPLAT_SPLAT8]], <i32 5>
+; CHECK-NEXT:    [[TMP12:%.*]] = mul <1 x i32> [[SPLAT_SPLAT8]], splat (i32 5)
 ; CHECK-NEXT:    [[TMP13:%.*]] = add <1 x i32> [[TMP10]], [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 5
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT9:%.*]] = insertelement <1 x i32> poison, i32 [[TMP14]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT10:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT9]], <1 x i32> poison, <1 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT10]], <i32 6>
+; CHECK-NEXT:    [[TMP15:%.*]] = mul <1 x i32> [[SPLAT_SPLAT10]], splat (i32 6)
 ; CHECK-NEXT:    [[TMP16:%.*]] = add <1 x i32> [[TMP13]], [[TMP15]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 6
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT11:%.*]] = insertelement <1 x i32> poison, i32 [[TMP17]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT12:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT11]], <1 x i32> poison, <1 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP18:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], <i32 7>
+; CHECK-NEXT:    [[TMP18:%.*]] = mul <1 x i32> [[SPLAT_SPLAT12]], splat (i32 7)
 ; CHECK-NEXT:    [[TMP19:%.*]] = add <1 x i32> [[TMP16]], [[TMP18]]
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[SPLIT]], i64 7
 ; CHECK-NEXT:    [[SPLAT_SPLATINSERT13:%.*]] = insertelement <1 x i32> poison, i32 [[TMP20]], i64 0
 ; CHECK-NEXT:    [[SPLAT_SPLAT14:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT13]], <1 x i32> poison, <1 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP21:%.*]] = mul <1 x i32> [[SPLAT_SPLAT14]], <i32 8>
+; CHECK-NEXT:    [[TMP21:%.*]] = mul <1 x i32> [[SPLAT_SPLAT14]], splat (i32 8)
 ; CHECK-NEXT:    [[TMP22:%.*]] = add <1 x i32> [[TMP19]], [[TMP21]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <1 x i32> [[TMP22]], <1 x i32> poison, <1 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <1 x i32> poison, <1 x i32> [[TMP23]], <1 x i32> <i32 1>
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll
index 6bcc61a3c6291a..d49745909b0263 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/multiply-add-sub-double-row-major.ll
@@ -21,9 +21,9 @@ define void @multiply_sub_add_2x3_3x2(ptr %a.ptr, ptr %b.ptr, ptr %c.ptr) {
 ; RM-NEXT:    store <3 x double> [[TMP0]], ptr [[A_PTR]], align 8
 ; RM-NEXT:    [[VEC_GEP7:%.*]] = getelementptr double, ptr [[A_PTR]], i64 3
 ; RM-NEXT:    store <3 x double> [[TMP1]], ptr [[VEC_GEP7]], align 8
-; RM-NEXT:    [[TMP2:%.*]] = fsub <2 x double> [[COL_LOAD2]], <double 1.000000e+00, double 1.000000e+00>
-; RM-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[COL_LOAD4]], <double 1.000000e+00, double 1.000000e+00>
-; RM-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[COL_LOAD6]], <double 1.000000e+00, double 1.000000e+00>
+; RM-NEXT:    [[TMP2:%.*]] = fsub <2 x double> [[COL_LOAD2]], splat (double 1.000000e+00)
+; RM-NEXT:    [[TMP3:%.*]] = fsub <2 x double> [[COL_LOAD4]], splat (double 1.000000e+00)
+; RM-NEXT:    [[TMP4:%.*]] = fsub <2 x double> [[COL_LOAD6]], splat (double 1.000000e+00)
 ; RM-NEXT:    store <2 x double> [[TMP2]], ptr [[B_PTR]], align 8
 ; RM-NEXT:    [[VEC_GEP8:%.*]] = getelementptr double, ptr [[B_PTR]], i64 2
 ; RM-NEXT:    store <2 x double> [[TMP3]], ptr [[VEC_GEP8]], align 8
diff --git a/llvm/test/Transforms/MemCpyOpt/form-memset.ll b/llvm/test/Transforms/MemCpyOpt/form-memset.ll
index 020a72183e9ea1..f8c4a6c6d11261 100644
--- a/llvm/test/Transforms/MemCpyOpt/form-memset.ll
+++ b/llvm/test/Transforms/MemCpyOpt/form-memset.ll
@@ -325,7 +325,7 @@ define void @test8() {
 ; CHECK-LABEL: @test8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[MEMTMP:%.*]] = alloca [[STRUCT_TEST8:%.*]], align 16
-; CHECK-NEXT:    store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, ptr [[MEMTMP]], align 16
+; CHECK-NEXT:    store <4 x i32> splat (i32 -1), ptr [[MEMTMP]], align 16
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/NewGVN/completeness.ll b/llvm/test/Transforms/NewGVN/completeness.ll
index 4841e2e958b281..17592ffaf5d432 100644
--- a/llvm/test/Transforms/NewGVN/completeness.ll
+++ b/llvm/test/Transforms/NewGVN/completeness.ll
@@ -119,8 +119,8 @@ define <2 x i32> @test3vec(i1 %which) {
 ; CHECK:       delay:
 ; CHECK-NEXT:    br label [[FINAL]]
 ; CHECK:       final:
-; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi <2 x i32> [ <i32 -877, i32 -877>, [[ENTRY:%.*]] ], [ <i32 113, i32 113>, [[DELAY]] ]
-; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ <i32 1000, i32 1000>, [[ENTRY]] ], [ <i32 10, i32 10>, [[DELAY]] ]
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi <2 x i32> [ splat (i32 -877), [[ENTRY:%.*]] ], [ splat (i32 113), [[DELAY]] ]
+; CHECK-NEXT:    [[A:%.*]] = phi <2 x i32> [ splat (i32 1000), [[ENTRY]] ], [ splat (i32 10), [[DELAY]] ]
 ; CHECK-NEXT:    ret <2 x i32> [[PHIOFOPS]]
 ;
 
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
index 33bcab679ba91a..4a5ce1359d2571 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll
@@ -54,10 +54,10 @@ define void @loop(ptr %X, ptr %Y) {
 ; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <2 x double>, ptr [[TMP2]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD8]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], <double 6.000000e+00, double 6.000000e+00>
-; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD8]], <double 6.000000e+00, double 6.000000e+00>
-; CHECK-NEXT:    [[TMP7:%.*]] = select <2 x i1> [[TMP5]], <2 x double> <double 6.000000e+00, double 6.000000e+00>, <2 x double> [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x double> <double 6.000000e+00, double 6.000000e+00>, <2 x double> [[WIDE_LOAD8]]
+; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], splat (double 6.000000e+00)
+; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD8]], splat (double 6.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = select <2 x i1> [[TMP5]], <2 x double> splat (double 6.000000e+00), <2 x double> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x double> splat (double 6.000000e+00), <2 x double> [[WIDE_LOAD8]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = select <2 x i1> [[TMP3]], <2 x double> zeroinitializer, <2 x double> [[TMP7]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = select <2 x i1> [[TMP4]], <2 x double> zeroinitializer, <2 x double> [[TMP8]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDEX]]
@@ -152,8 +152,8 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4, !alias.scope [[META4:![0-9]+]]
 ; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META4]]
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 20, i32 20, i32 20, i32 20>
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], <i32 20, i32 20, i32 20, i32 20>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 20)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD7]], splat (i32 20)
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16
 ; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope [[META7:![0-9]+]]
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
index db0656da579f4b..e4a24f4d85e9ce 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
@@ -113,8 +113,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP4]], i64 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x i64> [[TMP9]], i64 [[TMP6]], i64 1
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult <2 x i64> [[TMP8]], <i64 225, i64 225>
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult <2 x i64> [[TMP10]], <i64 225, i64 225>
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult <2 x i64> [[TMP8]], splat (i64 225)
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult <2 x i64> [[TMP10]], splat (i64 225)
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i64 0
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP13]])
 ; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i64 1
@@ -189,8 +189,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP38:%.*]] = add nuw nsw i64 [[INDEX_1]], 18
 ; CHECK-NEXT:    [[TMP39:%.*]] = insertelement <2 x i64> poison, i64 [[TMP37]], i64 0
 ; CHECK-NEXT:    [[TMP40:%.*]] = insertelement <2 x i64> [[TMP39]], i64 [[TMP38]], i64 1
-; CHECK-NEXT:    [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], <i64 225, i64 225>
-; CHECK-NEXT:    [[TMP42:%.*]] = icmp ult <2 x i64> [[TMP40]], <i64 225, i64 225>
+; CHECK-NEXT:    [[TMP41:%.*]] = icmp ult <2 x i64> [[TMP36]], splat (i64 225)
+; CHECK-NEXT:    [[TMP42:%.*]] = icmp ult <2 x i64> [[TMP40]], splat (i64 225)
 ; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <2 x i1> [[TMP41]], i64 0
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP43]])
 ; CHECK-NEXT:    [[TMP44:%.*]] = extractelement <2 x i1> [[TMP41]], i64 1
@@ -266,8 +266,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP69:%.*]] = add nuw nsw i64 [[INDEX_2]], 33
 ; CHECK-NEXT:    [[TMP70:%.*]] = insertelement <2 x i64> poison, i64 [[TMP68]], i64 0
 ; CHECK-NEXT:    [[TMP71:%.*]] = insertelement <2 x i64> [[TMP70]], i64 [[TMP69]], i64 1
-; CHECK-NEXT:    [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], <i64 225, i64 225>
-; CHECK-NEXT:    [[TMP73:%.*]] = icmp ult <2 x i64> [[TMP71]], <i64 225, i64 225>
+; CHECK-NEXT:    [[TMP72:%.*]] = icmp ult <2 x i64> [[TMP67]], splat (i64 225)
+; CHECK-NEXT:    [[TMP73:%.*]] = icmp ult <2 x i64> [[TMP71]], splat (i64 225)
 ; CHECK-NEXT:    [[TMP74:%.*]] = extractelement <2 x i1> [[TMP72]], i64 0
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP74]])
 ; CHECK-NEXT:    [[TMP75:%.*]] = extractelement <2 x i1> [[TMP72]], i64 1
@@ -343,8 +343,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea
 ; CHECK-NEXT:    [[TMP100:%.*]] = add nuw nsw i64 [[INDEX_3]], 48
 ; CHECK-NEXT:    [[TMP101:%.*]] = insertelement <2 x i64> poison, i64 [[TMP99]], i64 0
 ; CHECK-NEXT:    [[TMP102:%.*]] = insertelement <2 x i64> [[TMP101]], i64 [[TMP100]], i64 1
-; CHECK-NEXT:    [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], <i64 225, i64 225>
-; CHECK-NEXT:    [[TMP104:%.*]] = icmp ult <2 x i64> [[TMP102]], <i64 225, i64 225>
+; CHECK-NEXT:    [[TMP103:%.*]] = icmp ult <2 x i64> [[TMP98]], splat (i64 225)
+; CHECK-NEXT:    [[TMP104:%.*]] = icmp ult <2 x i64> [[TMP102]], splat (i64 225)
 ; CHECK-NEXT:    [[TMP105:%.*]] = extractelement <2 x i1> [[TMP103]], i64 0
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP105]])
 ; CHECK-NEXT:    [[TMP106:%.*]] = extractelement <2 x i1> [[TMP103]], i64 1
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll
index b53d0c211919b8..eba0ed2d35fe46 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/quant_4x4.ll
@@ -43,8 +43,8 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = sub nsw <8 x i32> [[TMP7]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = mul <8 x i32> [[TMP11]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = mul <8 x i32> [[TMP12]], [[TMP10]]
-; CHECK-NEXT:    [[TMP15:%.*]] = lshr <8 x i32> [[TMP13]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[TMP16:%.*]] = lshr <8 x i32> [[TMP14]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP15:%.*]] = lshr <8 x i32> [[TMP13]], splat (i32 16)
+; CHECK-NEXT:    [[TMP16:%.*]] = lshr <8 x i32> [[TMP14]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP17:%.*]] = trunc nuw <8 x i32> [[TMP15]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP18:%.*]] = trunc nuw <8 x i32> [[TMP16]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP19:%.*]] = sub <8 x i16> zeroinitializer, [[TMP17]]
@@ -53,8 +53,8 @@ define i32 @quant_4x4(ptr noundef %dct, ptr noundef %mf, ptr noundef %bias) {
 ; CHECK-NEXT:    [[TMP22:%.*]] = add nuw nsw <8 x i32> [[TMP7]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = mul <8 x i32> [[TMP21]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = mul <8 x i32> [[TMP22]], [[TMP10]]
-; CHECK-NEXT:    [[TMP25:%.*]] = lshr <8 x i32> [[TMP23]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[TMP26:%.*]] = lshr <8 x i32> [[TMP24]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP25:%.*]] = lshr <8 x i32> [[TMP23]], splat (i32 16)
+; CHECK-NEXT:    [[TMP26:%.*]] = lshr <8 x i32> [[TMP24]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP27:%.*]] = trunc nuw <8 x i32> [[TMP25]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP28:%.*]] = trunc nuw <8 x i32> [[TMP26]] to <8 x i16>
 ; CHECK-NEXT:    [[PREDPHI:%.*]] = select <8 x i1> [[TMP3]], <8 x i16> [[TMP27]], <8 x i16> [[TMP19]]
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
index 2121775224098e..8d31a7d25ff3da 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/slpordering.ll
@@ -72,7 +72,7 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32
 ; CHECK-NEXT:    [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]]
-; CHECK-NEXT:    [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP43:%.*]] = add nsw <16 x i32> [[TMP42]], [[TMP26]]
 ; CHECK-NEXT:    [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP45:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -99,9 +99,9 @@ define i32 @slpordering(ptr noundef %p1, i32 noundef %ip1, ptr noundef %p2, i32
 ; CHECK-NEXT:    [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP65]]
 ; CHECK-NEXT:    [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP62]], [[TMP64]]
 ; CHECK-NEXT:    [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:    [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP70:%.*]] = and <16 x i32> [[TMP69]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], splat (i32 15)
+; CHECK-NEXT:    [[TMP70:%.*]] = and <16 x i32> [[TMP69]], splat (i32 65537)
+; CHECK-NEXT:    [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP72:%.*]] = add <16 x i32> [[TMP71]], [[TMP68]]
 ; CHECK-NEXT:    [[TMP73:%.*]] = xor <16 x i32> [[TMP72]], [[TMP71]]
 ; CHECK-NEXT:    [[TMP74:%.*]] = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP73]])
diff --git a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
index 6e9abb3813aa1d..595b0f76a418be 100644
--- a/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
+++ b/llvm/test/Transforms/PhaseOrdering/ARM/arm_mult_q15.ll
@@ -39,8 +39,8 @@ define void @arm_mult_q15(ptr %pSrcA, ptr %pSrcB, ptr noalias %pDst, i32 %blockS
 ; CHECK-NEXT:    [[WIDE_LOAD17:%.*]] = load <8 x i16>, ptr [[NEXT_GEP16]], align 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = sext <8 x i16> [[WIDE_LOAD17]] to <8 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = mul nsw <8 x i32> [[TMP4]], [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = ashr <8 x i32> [[TMP5]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP7:%.*]] = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP6]], <8 x i32> <i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767, i32 32767>)
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr <8 x i32> [[TMP5]], splat (i32 15)
+; CHECK-NEXT:    [[TMP7:%.*]] = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[TMP6]], <8 x i32> splat (i32 32767))
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16>
 ; CHECK-NEXT:    store <8 x i16> [[TMP8]], ptr [[NEXT_GEP14]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
index 33a0eb43b70856..0db94bea46c402 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
@@ -62,8 +62,8 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8
 ; O2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
 ; O2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
 ; O2-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
-; O2-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
-; O2-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD8]], <i32 1, i32 1, i32 1, i32 1>
+; O2-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
+; O2-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD8]], splat (i32 1)
 ; O2-NEXT:    store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
 ; O2-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
 ; O2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -107,8 +107,8 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8
 ; O3-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
 ; O3-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]]
 ; O3-NEXT:    [[WIDE_LOAD9:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
-; O3-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], <i32 1, i32 1, i32 1, i32 1>
-; O3-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD9]], <i32 1, i32 1, i32 1, i32 1>
+; O3-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], splat (i32 1)
+; O3-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD9]], splat (i32 1)
 ; O3-NEXT:    store <4 x i32> [[TMP3]], ptr [[TMP1]], align 4, !tbaa [[TBAA0]]
 ; O3-NEXT:    store <4 x i32> [[TMP4]], ptr [[TMP2]], align 4, !tbaa [[TBAA0]]
 ; O3-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
index 05e39efc7745ae..580c13d50b1f51 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll
@@ -38,10 +38,10 @@ define void @loop_or(ptr noalias %pIn, ptr noalias %pOut, i32 %s) {
 ; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[WIDE_LOAD4]] to <4 x i32>
-; CHECK-NEXT:    [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[TMP2]], <i32 65793, i32 65793, i32 65793, i32 65793>
-; CHECK-NEXT:    [[TMP5:%.*]] = mul nuw nsw <4 x i32> [[TMP3]], <i32 65793, i32 65793, i32 65793, i32 65793>
-; CHECK-NEXT:    [[TMP6:%.*]] = or disjoint <4 x i32> [[TMP4]], <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
-; CHECK-NEXT:    [[TMP7:%.*]] = or disjoint <4 x i32> [[TMP5]], <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
+; CHECK-NEXT:    [[TMP4:%.*]] = mul nuw nsw <4 x i32> [[TMP2]], splat (i32 65793)
+; CHECK-NEXT:    [[TMP5:%.*]] = mul nuw nsw <4 x i32> [[TMP3]], splat (i32 65793)
+; CHECK-NEXT:    [[TMP6:%.*]] = or disjoint <4 x i32> [[TMP4]], splat (i32 -16777216)
+; CHECK-NEXT:    [[TMP7:%.*]] = or disjoint <4 x i32> [[TMP5]], splat (i32 -16777216)
 ; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[POUT:%.*]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16
 ; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll
index be7906ebc0a8a2..a054d87997b1b5 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr48844-br-to-switch-vectorization.ll
@@ -58,22 +58,22 @@ define dso_local void @test(ptr %start, ptr %end) #0 {
 ; AVX2-NEXT:    [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4
 ; AVX2-NEXT:    [[WIDE_LOAD9:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4
 ; AVX2-NEXT:    [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4
-; AVX2-NEXT:    [[TMP8:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], <i32 -12, i32 -12, i32 -12, i32 -12, i32 -12, i32 -12, i32 -12, i32 -12>
-; AVX2-NEXT:    [[TMP9:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], <i32 -12, i32 -12, i32 -12, i32 -12, i32 -12, i32 -12, i32 -12, i32 -12>
-; AVX2-NEXT:    [[TMP10:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], <i32 -12, i32 -12, i32 -12, i32 -12, i32 -12, i32 -12, i32 -12, i32 -12>
-; AVX2-NEXT:    [[TMP11:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], <i32 -12, i32 -12, i32 -12, i32 -12, i32 -12, i32 -12, i32 -12, i32 -12>
-; AVX2-NEXT:    [[TMP12:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], <i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
-; AVX2-NEXT:    [[TMP13:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], <i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
-; AVX2-NEXT:    [[TMP14:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], <i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
-; AVX2-NEXT:    [[TMP15:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], <i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13>
+; AVX2-NEXT:    [[TMP8:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], splat (i32 -12)
+; AVX2-NEXT:    [[TMP9:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], splat (i32 -12)
+; AVX2-NEXT:    [[TMP10:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], splat (i32 -12)
+; AVX2-NEXT:    [[TMP11:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], splat (i32 -12)
+; AVX2-NEXT:    [[TMP12:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD]], splat (i32 13)
+; AVX2-NEXT:    [[TMP13:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD8]], splat (i32 13)
+; AVX2-NEXT:    [[TMP14:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD9]], splat (i32 13)
+; AVX2-NEXT:    [[TMP15:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD10]], splat (i32 13)
 ; AVX2-NEXT:    [[TMP16:%.*]] = or <8 x i1> [[TMP8]], [[TMP12]]
 ; AVX2-NEXT:    [[TMP17:%.*]] = or <8 x i1> [[TMP9]], [[TMP13]]
 ; AVX2-NEXT:    [[TMP18:%.*]] = or <8 x i1> [[TMP10]], [[TMP14]]
 ; AVX2-NEXT:    [[TMP19:%.*]] = or <8 x i1> [[TMP11]], [[TMP15]]
-; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, ptr [[NEXT_GEP]], i32 4, <8 x i1> [[TMP16]])
-; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, ptr [[TMP5]], i32 4, <8 x i1> [[TMP17]])
-; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, ptr [[TMP6]], i32 4, <8 x i1> [[TMP18]])
-; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>, ptr [[TMP7]], i32 4, <8 x i1> [[TMP19]])
+; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[NEXT_GEP]], i32 4, <8 x i1> [[TMP16]])
+; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[TMP5]], i32 4, <8 x i1> [[TMP17]])
+; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[TMP6]], i32 4, <8 x i1> [[TMP18]])
+; AVX2-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> splat (i32 42), ptr [[TMP7]], i32 4, <8 x i1> [[TMP19]])
 ; AVX2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
 ; AVX2-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; AVX2-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll
index d9101272ba3d0f..549e4e036a747b 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr50555.ll
@@ -6,26 +6,26 @@ define void @trunc_through_one_add(ptr noalias %0, ptr noalias readonly %1) {
 ; SSE-LABEL: @trunc_through_one_add(
 ; SSE-NEXT:    [[TMP3:%.*]] = load <8 x i8>, ptr [[TMP1:%.*]], align 1
 ; SSE-NEXT:    [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i16>
-; SSE-NEXT:    [[TMP5:%.*]] = lshr <8 x i16> [[TMP4]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; SSE-NEXT:    [[TMP5:%.*]] = lshr <8 x i16> [[TMP4]], splat (i16 1)
 ; SSE-NEXT:    [[TMP6:%.*]] = add nuw nsw <8 x i16> [[TMP5]], [[TMP4]]
-; SSE-NEXT:    [[TMP7:%.*]] = lshr <8 x i16> [[TMP6]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; SSE-NEXT:    [[TMP7:%.*]] = lshr <8 x i16> [[TMP6]], splat (i16 2)
 ; SSE-NEXT:    store <8 x i16> [[TMP7]], ptr [[TMP0:%.*]], align 2
 ; SSE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
 ; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
 ; SSE-NEXT:    [[TMP10:%.*]] = load <8 x i8>, ptr [[TMP8]], align 1
 ; SSE-NEXT:    [[TMP11:%.*]] = zext <8 x i8> [[TMP10]] to <8 x i16>
-; SSE-NEXT:    [[TMP12:%.*]] = lshr <8 x i16> [[TMP11]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; SSE-NEXT:    [[TMP12:%.*]] = lshr <8 x i16> [[TMP11]], splat (i16 1)
 ; SSE-NEXT:    [[TMP13:%.*]] = add nuw nsw <8 x i16> [[TMP12]], [[TMP11]]
-; SSE-NEXT:    [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; SSE-NEXT:    [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], splat (i16 2)
 ; SSE-NEXT:    store <8 x i16> [[TMP14]], ptr [[TMP9]], align 2
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @trunc_through_one_add(
 ; AVX-NEXT:    [[TMP3:%.*]] = load <16 x i8>, ptr [[TMP1:%.*]], align 1
 ; AVX-NEXT:    [[TMP4:%.*]] = zext <16 x i8> [[TMP3]] to <16 x i16>
-; AVX-NEXT:    [[TMP5:%.*]] = lshr <16 x i16> [[TMP4]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; AVX-NEXT:    [[TMP5:%.*]] = lshr <16 x i16> [[TMP4]], splat (i16 1)
 ; AVX-NEXT:    [[TMP6:%.*]] = add nuw nsw <16 x i16> [[TMP5]], [[TMP4]]
-; AVX-NEXT:    [[TMP7:%.*]] = lshr <16 x i16> [[TMP6]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; AVX-NEXT:    [[TMP7:%.*]] = lshr <16 x i16> [[TMP6]], splat (i16 2)
 ; AVX-NEXT:    store <16 x i16> [[TMP7]], ptr [[TMP0:%.*]], align 2
 ; AVX-NEXT:    ret void
 ;
@@ -181,9 +181,9 @@ define void @trunc_through_two_adds(ptr noalias %0, ptr noalias readonly %1, ptr
 ; SSE-NEXT:    [[TMP6:%.*]] = load <8 x i8>, ptr [[TMP2:%.*]], align 1
 ; SSE-NEXT:    [[TMP7:%.*]] = zext <8 x i8> [[TMP6]] to <8 x i16>
 ; SSE-NEXT:    [[TMP8:%.*]] = add nuw nsw <8 x i16> [[TMP7]], [[TMP5]]
-; SSE-NEXT:    [[TMP9:%.*]] = lshr <8 x i16> [[TMP8]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; SSE-NEXT:    [[TMP9:%.*]] = lshr <8 x i16> [[TMP8]], splat (i16 1)
 ; SSE-NEXT:    [[TMP10:%.*]] = add nuw nsw <8 x i16> [[TMP9]], [[TMP8]]
-; SSE-NEXT:    [[TMP11:%.*]] = lshr <8 x i16> [[TMP10]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; SSE-NEXT:    [[TMP11:%.*]] = lshr <8 x i16> [[TMP10]], splat (i16 2)
 ; SSE-NEXT:    store <8 x i16> [[TMP11]], ptr [[TMP0:%.*]], align 2
 ; SSE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8
 ; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 8
@@ -193,9 +193,9 @@ define void @trunc_through_two_adds(ptr noalias %0, ptr noalias readonly %1, ptr
 ; SSE-NEXT:    [[TMP17:%.*]] = load <8 x i8>, ptr [[TMP13]], align 1
 ; SSE-NEXT:    [[TMP18:%.*]] = zext <8 x i8> [[TMP17]] to <8 x i16>
 ; SSE-NEXT:    [[TMP19:%.*]] = add nuw nsw <8 x i16> [[TMP18]], [[TMP16]]
-; SSE-NEXT:    [[TMP20:%.*]] = lshr <8 x i16> [[TMP19]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; SSE-NEXT:    [[TMP20:%.*]] = lshr <8 x i16> [[TMP19]], splat (i16 1)
 ; SSE-NEXT:    [[TMP21:%.*]] = add nuw nsw <8 x i16> [[TMP20]], [[TMP19]]
-; SSE-NEXT:    [[TMP22:%.*]] = lshr <8 x i16> [[TMP21]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; SSE-NEXT:    [[TMP22:%.*]] = lshr <8 x i16> [[TMP21]], splat (i16 2)
 ; SSE-NEXT:    store <8 x i16> [[TMP22]], ptr [[TMP14]], align 2
 ; SSE-NEXT:    ret void
 ;
@@ -205,9 +205,9 @@ define void @trunc_through_two_adds(ptr noalias %0, ptr noalias readonly %1, ptr
 ; AVX-NEXT:    [[TMP6:%.*]] = load <16 x i8>, ptr [[TMP2:%.*]], align 1
 ; AVX-NEXT:    [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i16>
 ; AVX-NEXT:    [[TMP8:%.*]] = add nuw nsw <16 x i16> [[TMP7]], [[TMP5]]
-; AVX-NEXT:    [[TMP9:%.*]] = lshr <16 x i16> [[TMP8]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; AVX-NEXT:    [[TMP9:%.*]] = lshr <16 x i16> [[TMP8]], splat (i16 1)
 ; AVX-NEXT:    [[TMP10:%.*]] = add nuw nsw <16 x i16> [[TMP9]], [[TMP8]]
-; AVX-NEXT:    [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; AVX-NEXT:    [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], splat (i16 2)
 ; AVX-NEXT:    store <16 x i16> [[TMP11]], ptr [[TMP0:%.*]], align 2
 ; AVX-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
index 1c16b37144f1eb..fb7a4a3df74ef2 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll
@@ -15,7 +15,7 @@ define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize {
 ; CHECK-NEXT:    [[TMP3:%.*]] = sub nuw nsw i64 255, [[INDVARS_IV]]
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[TMP3]]
 ; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = load <8 x i32>, ptr [[GEP]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 5)
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    store <8 x i32> [[TMP6]], ptr [[TMP10]], align 4
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll b/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll
index da2bd6f22df9f3..719d6df77cb7d5 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/shuffle-inseltpoison.ll
@@ -10,7 +10,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) {
 ; CHECK-LABEL: @shuffle_32_add_16_shuffle_32_masks_are_eq(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1)
 ; CHECK-NEXT:    [[BC5:%.*]] = bitcast <8 x i16> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[BC5]]
 ;
@@ -31,7 +31,7 @@ define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) {
 define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) {
 ; CHECK-LABEL: @shuffle_32_add_8_shuffle_32_masks_are_eq(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <16 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    [[BC5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[BC5]]
 ;
@@ -52,7 +52,7 @@ define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) {
 define <2 x i64> @shuffle_8_add_32_shuffle_8_masks_are_eq(<2 x i64> %v) {
 ; CHECK-LABEL: @shuffle_8_add_32_shuffle_8_masks_are_eq(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    [[BC5:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[BC5]]
 ;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll b/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll
index 039e735985f9fc..930ce6a7b7d677 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll
@@ -10,7 +10,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) {
 ; CHECK-LABEL: @shuffle_32_add_16_shuffle_32_masks_are_eq(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 1)
 ; CHECK-NEXT:    [[BC5:%.*]] = bitcast <8 x i16> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[BC5]]
 ;
@@ -31,7 +31,7 @@ define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) {
 define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) {
 ; CHECK-LABEL: @shuffle_32_add_8_shuffle_32_masks_are_eq(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <16 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    [[BC5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[BC5]]
 ;
@@ -52,7 +52,7 @@ define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) {
 define <2 x i64> @shuffle_8_add_32_shuffle_8_masks_are_eq(<2 x i64> %v) {
 ; CHECK-LABEL: @shuffle_8_add_32_shuffle_8_masks_are_eq(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    [[BC5:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[BC5]]
 ;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
index 5bf7be4362a8e4..92ce8eb5b5ac23 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
@@ -41,8 +41,8 @@ define void @licm(ptr align 8 dereferenceable(8) %_M_start.i, i64 %numElem) {
 ; O23-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; O23-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 [[INDEX]]
 ; O23-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16
-; O23-NEXT:    store <2 x double> <double 2.000000e+00, double 2.000000e+00>, ptr [[TMP1]], align 8, !tbaa [[TBAA8:![0-9]+]]
-; O23-NEXT:    store <2 x double> <double 2.000000e+00, double 2.000000e+00>, ptr [[TMP2]], align 8, !tbaa [[TBAA8]]
+; O23-NEXT:    store <2 x double> splat (double 2.000000e+00), ptr [[TMP1]], align 8, !tbaa [[TBAA8:![0-9]+]]
+; O23-NEXT:    store <2 x double> splat (double 2.000000e+00), ptr [[TMP2]], align 8, !tbaa [[TBAA8]]
 ; O23-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; O23-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; O23-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll
index 465b18ade1a7c3..993e949d6d2a0c 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv-nounroll.ll
@@ -19,7 +19,7 @@ define void @vdiv(ptr %a, float %b) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP0:%.*]] = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP0:%.*]] = fdiv fast <4 x float> splat (float 1.000000e+00), [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
index 87d3900765490f..ac4b56b0fd1b11 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll
@@ -29,10 +29,10 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632
 ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP3:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP4:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll b/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll
index 841096d226f756..428147a1383b11 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vec-shift.ll
@@ -34,35 +34,35 @@ define noundef i64 @foo(i64 noundef %0) {
 define void @bar(ptr noundef %0) {
 ; SSE-LABEL: @bar(
 ; SSE-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[TMP0:%.*]], align 8
-; SSE-NEXT:    [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], <i64 -1, i64 -1>
-; SSE-NEXT:    [[TMP4:%.*]] = shl <2 x i64> [[TMP3]], <i64 44, i64 44>
+; SSE-NEXT:    [[TMP3:%.*]] = xor <2 x i64> [[TMP2]], splat (i64 -1)
+; SSE-NEXT:    [[TMP4:%.*]] = shl <2 x i64> [[TMP3]], splat (i64 44)
 ; SSE-NEXT:    store <2 x i64> [[TMP4]], ptr [[TMP0]], align 8
 ; SSE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 16
 ; SSE-NEXT:    [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8
-; SSE-NEXT:    [[TMP7:%.*]] = xor <2 x i64> [[TMP6]], <i64 -1, i64 -1>
-; SSE-NEXT:    [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], <i64 44, i64 44>
+; SSE-NEXT:    [[TMP7:%.*]] = xor <2 x i64> [[TMP6]], splat (i64 -1)
+; SSE-NEXT:    [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], splat (i64 44)
 ; SSE-NEXT:    store <2 x i64> [[TMP8]], ptr [[TMP5]], align 8
 ; SSE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
 ; SSE-NEXT:    [[TMP10:%.*]] = load <2 x i64>, ptr [[TMP9]], align 8
-; SSE-NEXT:    [[TMP11:%.*]] = xor <2 x i64> [[TMP10]], <i64 -1, i64 -1>
-; SSE-NEXT:    [[TMP12:%.*]] = shl <2 x i64> [[TMP11]], <i64 44, i64 44>
+; SSE-NEXT:    [[TMP11:%.*]] = xor <2 x i64> [[TMP10]], splat (i64 -1)
+; SSE-NEXT:    [[TMP12:%.*]] = shl <2 x i64> [[TMP11]], splat (i64 44)
 ; SSE-NEXT:    store <2 x i64> [[TMP12]], ptr [[TMP9]], align 8
 ; SSE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 48
 ; SSE-NEXT:    [[TMP14:%.*]] = load <2 x i64>, ptr [[TMP13]], align 8
-; SSE-NEXT:    [[TMP15:%.*]] = xor <2 x i64> [[TMP14]], <i64 -1, i64 -1>
-; SSE-NEXT:    [[TMP16:%.*]] = shl <2 x i64> [[TMP15]], <i64 44, i64 44>
+; SSE-NEXT:    [[TMP15:%.*]] = xor <2 x i64> [[TMP14]], splat (i64 -1)
+; SSE-NEXT:    [[TMP16:%.*]] = shl <2 x i64> [[TMP15]], splat (i64 44)
 ; SSE-NEXT:    store <2 x i64> [[TMP16]], ptr [[TMP13]], align 8
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @bar(
 ; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 8
-; AVX-NEXT:    [[TMP3:%.*]] = xor <4 x i64> [[TMP2]], <i64 -1, i64 -1, i64 -1, i64 -1>
-; AVX-NEXT:    [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], <i64 44, i64 44, i64 44, i64 44>
+; AVX-NEXT:    [[TMP3:%.*]] = xor <4 x i64> [[TMP2]], splat (i64 -1)
+; AVX-NEXT:    [[TMP4:%.*]] = shl <4 x i64> [[TMP3]], splat (i64 44)
 ; AVX-NEXT:    store <4 x i64> [[TMP4]], ptr [[TMP0]], align 8
 ; AVX-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 32
 ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
-; AVX-NEXT:    [[TMP7:%.*]] = xor <4 x i64> [[TMP6]], <i64 -1, i64 -1, i64 -1, i64 -1>
-; AVX-NEXT:    [[TMP8:%.*]] = shl <4 x i64> [[TMP7]], <i64 44, i64 44, i64 44, i64 44>
+; AVX-NEXT:    [[TMP7:%.*]] = xor <4 x i64> [[TMP6]], splat (i64 -1)
+; AVX-NEXT:    [[TMP8:%.*]] = shl <4 x i64> [[TMP7]], splat (i64 44)
 ; AVX-NEXT:    store <4 x i64> [[TMP8]], ptr [[TMP5]], align 8
 ; AVX-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
index d2850f36a80dc2..035f3f145bf8d3 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-logical.ll
@@ -11,7 +11,7 @@ define float @test_merge_allof_v4sf(<4 x float> %t) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = fcmp uge <4 x float> [[T_FR]], zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[TMP2]], [[TMP5]]
@@ -168,7 +168,7 @@ define float @test_separate_allof_v4sf(<4 x float> %t) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = fcmp uge <4 x float> [[T_FR]], zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ule <4 x float> [[T_FR]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[TMP2]], [[TMP5]]
@@ -250,7 +250,7 @@ define float @test_separate_anyof_v4sf(<4 x float> %t) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = fcmp olt <4 x float> [[T_FR]], zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i4 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = fcmp ogt <4 x float> [[T_FR]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP2:%.*]] = fcmp ogt <4 x float> [[T_FR]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
 ; CHECK-NEXT:    [[DOTNOT6:%.*]] = icmp eq i4 [[TMP3]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[DOTNOT]], [[DOTNOT6]]
@@ -332,7 +332,7 @@ define float @test_merge_allof_v4si(<4 x i32> %t) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <4 x i32> [[T_FR]], zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 256, i32 256, i32 256, i32 256>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], splat (i32 256)
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[TMP2]], [[TMP5]]
@@ -479,7 +479,7 @@ define i32 @test_separate_allof_v4si(<4 x i32> %t) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i4 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP2]], label [[RETURN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 256, i32 256, i32 256, i32 256>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <4 x i32> [[T_FR]], splat (i32 256)
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i4 [[TMP4]], 0
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
@@ -552,12 +552,12 @@ define i32 @test_separate_anyof_v4si(<4 x i32> %t) {
 ; CHECK-LABEL: @test_separate_anyof_v4si(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[T_FR:%.*]] = freeze <4 x i32> [[T:%.*]]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt <4 x i32> [[T_FR]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i1> [[TMP0]] to i4
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i4 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[DOTNOT]], label [[IF_END:%.*]], label [[RETURN:%.*]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i32> [[T_FR]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i32> [[T_FR]], splat (i32 255)
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i1> [[TMP2]] to i4
 ; CHECK-NEXT:    [[DOTNOT6:%.*]] = icmp eq i4 [[TMP3]], 0
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i32> [[T_FR]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index c169bc000644ed..254136b0b841a9 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -280,11 +280,11 @@ define i1 @cmp_lt_gt(double %a, double %b, double %c) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[MUL]], i64 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = fdiv <2 x double> [[TMP4]], [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], <double 0x3EB0C6F7A0B5ED8D, double 0x3EB0C6F7A0B5ED8D>
+; CHECK-NEXT:    [[TMP8:%.*]] = fcmp olt <2 x double> [[TMP7]], splat (double 0x3EB0C6F7A0B5ED8D)
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP8]], <2 x i1> poison, <2 x i32> <i32 1, i32 poison>
 ; CHECK-NEXT:    [[TMP9:%.*]] = and <2 x i1> [[TMP8]], [[SHIFT]]
 ; CHECK-NEXT:    [[OR_COND:%.*]] = extractelement <2 x i1> [[TMP9]], i64 0
-; CHECK-NEXT:    [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <2 x i1> [[TMP10]], <2 x i1> poison, <2 x i32> <i32 1, i32 poison>
 ; CHECK-NEXT:    [[TMP11:%.*]] = or <2 x i1> [[TMP10]], [[SHIFT2]]
 ; CHECK-NEXT:    [[OR_COND1_NOT:%.*]] = extractelement <2 x i1> [[TMP11]], i64 0
diff --git a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll
index 0b88a65a793e92..617d513f712681 100644
--- a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -103,8 +103,8 @@ define <2 x double> @test3_reassoc(<2 x double> %a, <2 x double> %b, <2 x double
 
 define <2 x float> @test4(<2 x float> %A) {
 ; CHECK-LABEL: @test4(
-; CHECK-NEXT:    [[X:%.*]] = fadd <2 x float> [[A:%.*]], <float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[Y:%.*]] = fadd <2 x float> [[A]], <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[X:%.*]] = fadd <2 x float> [[A:%.*]], splat (float 1.000000e+00)
+; CHECK-NEXT:    [[Y:%.*]] = fadd <2 x float> [[A]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[R:%.*]] = fsub <2 x float> [[X]], [[Y]]
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
@@ -118,7 +118,7 @@ define <2 x float> @test4(<2 x float> %A) {
 
 define <2 x float> @test5(<2 x float> %X) {
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[FACTOR:%.*]] = fmul fast <2 x float> [[X:%.*]], <float 9.400000e+01, float 9.400000e+01>
+; CHECK-NEXT:    [[FACTOR:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 9.400000e+01)
 ; CHECK-NEXT:    ret <2 x float> [[FACTOR]]
 ;
   %Y = fmul fast <2 x float> %X, <float 4.700000e+01, float 4.700000e+01>
@@ -130,7 +130,7 @@ define <2 x float> @test5(<2 x float> %X) {
 
 define <2 x float> @test5_reassoc(<2 x float> %X) {
 ; CHECK-LABEL: @test5_reassoc(
-; CHECK-NEXT:    [[Y:%.*]] = fmul reassoc <2 x float> [[X:%.*]], <float 4.700000e+01, float 4.700000e+01>
+; CHECK-NEXT:    [[Y:%.*]] = fmul reassoc <2 x float> [[X:%.*]], splat (float 4.700000e+01)
 ; CHECK-NEXT:    [[Z:%.*]] = fadd reassoc <2 x float> [[Y]], [[Y]]
 ; CHECK-NEXT:    ret <2 x float> [[Z]]
 ;
@@ -143,7 +143,7 @@ define <2 x float> @test5_reassoc(<2 x float> %X) {
 
 define <2 x float> @test6(<2 x float> %X) {
 ; CHECK-LABEL: @test6(
-; CHECK-NEXT:    [[FACTOR:%.*]] = fmul fast <2 x float> [[X:%.*]], <float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[FACTOR:%.*]] = fmul fast <2 x float> [[X:%.*]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    ret <2 x float> [[FACTOR]]
 ;
   %Y = fadd fast <2 x float> %X ,%X
@@ -168,7 +168,7 @@ define <2 x float> @test6_reassoc(<2 x float> %X) {
 
 define <2 x double> @test7(<2 x double> %W) {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[REASS_MUL:%.*]] = fmul fast <2 x double> [[W:%.*]], <double 1.770000e+02, double 1.770000e+02>
+; CHECK-NEXT:    [[REASS_MUL:%.*]] = fmul fast <2 x double> [[W:%.*]], splat (double 1.770000e+02)
 ; CHECK-NEXT:    ret <2 x double> [[REASS_MUL]]
 ;
   %X = fmul fast <2 x double> %W, <double 127.0, double 127.0>
@@ -181,8 +181,8 @@ define <2 x double> @test7(<2 x double> %W) {
 
 define <2 x double> @test7_reassoc(<2 x double> %W) {
 ; CHECK-LABEL: @test7_reassoc(
-; CHECK-NEXT:    [[X:%.*]] = fmul reassoc <2 x double> [[W:%.*]], <double 1.270000e+02, double 1.270000e+02>
-; CHECK-NEXT:    [[Y:%.*]] = fmul reassoc <2 x double> [[W]], <double 5.000000e+01, double 5.000000e+01>
+; CHECK-NEXT:    [[X:%.*]] = fmul reassoc <2 x double> [[W:%.*]], splat (double 1.270000e+02)
+; CHECK-NEXT:    [[Y:%.*]] = fmul reassoc <2 x double> [[W]], splat (double 5.000000e+01)
 ; CHECK-NEXT:    [[Z:%.*]] = fadd reassoc <2 x double> [[Y]], [[X]]
 ; CHECK-NEXT:    ret <2 x double> [[Z]]
 ;
@@ -196,7 +196,7 @@ define <2 x double> @test7_reassoc(<2 x double> %W) {
 
 define <2 x float> @test8(<2 x float> %arg) {
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x float> [[ARG:%.*]], <float 1.440000e+02, float 1.440000e+02>
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x float> [[ARG:%.*]], splat (float 1.440000e+02)
 ; CHECK-NEXT:    ret <2 x float> [[TMP2]]
 ;
   %tmp1 = fmul fast <2 x float> <float 1.200000e+01, float 1.200000e+01>, %arg
@@ -208,8 +208,8 @@ define <2 x float> @test8(<2 x float> %arg) {
 
 define <2 x float> @test8_reassoc(<2 x float> %arg) {
 ; CHECK-LABEL: @test8_reassoc(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc <2 x float> [[ARG:%.*]], <float 1.200000e+01, float 1.200000e+01>
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc <2 x float> [[TMP1]], <float 1.200000e+01, float 1.200000e+01>
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc <2 x float> [[ARG:%.*]], splat (float 1.200000e+01)
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc <2 x float> [[TMP1]], splat (float 1.200000e+01)
 ; CHECK-NEXT:    ret <2 x float> [[TMP2]]
 ;
   %tmp1 = fmul reassoc <2 x float> <float 1.200000e+01, float 1.200000e+01>, %arg
@@ -222,7 +222,7 @@ define <2 x float> @test8_reassoc(<2 x float> %arg) {
 define <2 x double> @test9(<2 x double> %b, <2 x double> %a) {
 ; CHECK-LABEL: @test9(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast <2 x double> zeroinitializer, [[A:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], <double 1.234000e+03, double 1.234000e+03>
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], splat (double 1.234000e+03)
 ; CHECK-NEXT:    ret <2 x double> [[TMP2]]
 ;
   %1 = fadd fast <2 x double> %a, <double 1.234000e+03, double 1.234000e+03>
@@ -235,7 +235,7 @@ define <2 x double> @test9(<2 x double> %b, <2 x double> %a) {
 define <2 x double> @test9_unary_fneg(<2 x double> %b, <2 x double> %a) {
 ; CHECK-LABEL: @test9_unary_fneg(
 ; CHECK-NEXT:    [[TMP1:%.*]] = fneg fast <2 x double> [[A:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], <double 1.234000e+03, double 1.234000e+03>
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], splat (double 1.234000e+03)
 ; CHECK-NEXT:    ret <2 x double> [[TMP2]]
 ;
   %1 = fadd fast <2 x double> %a, <double 1.234000e+03, double 1.234000e+03>
@@ -249,7 +249,7 @@ define <2 x double> @test9_unary_fneg(<2 x double> %b, <2 x double> %a) {
 
 define <2 x double> @test9_reassoc(<2 x double> %b, <2 x double> %a) {
 ; CHECK-LABEL: @test9_reassoc(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc <2 x double> [[A:%.*]], <double 1.234000e+03, double 1.234000e+03>
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc <2 x double> [[A:%.*]], splat (double 1.234000e+03)
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc <2 x double> [[B:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc <2 x double> zeroinitializer, [[A]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd reassoc <2 x double> [[TMP3]], [[TMP2]]
@@ -264,7 +264,7 @@ define <2 x double> @test9_reassoc(<2 x double> %b, <2 x double> %a) {
 
 define <2 x double> @test9_reassoc_unary_fneg(<2 x double> %b, <2 x double> %a) {
 ; CHECK-LABEL: @test9_reassoc_unary_fneg(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc <2 x double> [[A:%.*]], <double 1.234000e+03, double 1.234000e+03>
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc <2 x double> [[A:%.*]], splat (double 1.234000e+03)
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc <2 x double> [[B:%.*]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = fneg reassoc <2 x double> [[A]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd reassoc <2 x double> [[TMP3]], [[TMP2]]
@@ -281,7 +281,7 @@ define <2 x double> @test9_reassoc_unary_fneg(<2 x double> %b, <2 x double> %a)
 
 define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[C:%.*]] = fmul fast <2 x float> [[A:%.*]], <float 4.000000e+01, float 4.000000e+01>
+; CHECK-NEXT:    [[C:%.*]] = fmul fast <2 x float> [[A:%.*]], splat (float 4.000000e+01)
 ; CHECK-NEXT:    [[E:%.*]] = fmul fast <2 x float> [[C]], [[Z:%.*]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <2 x float> [[E]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x float> [[TMP1]]
@@ -295,7 +295,7 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
 
 define <2 x float> @test10_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
 ; CHECK-LABEL: @test10_unary_fneg(
-; CHECK-NEXT:    [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], <float 4.000000e+01, float 4.000000e+01>
+; CHECK-NEXT:    [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], splat (float 4.000000e+01)
 ; CHECK-NEXT:    [[F:%.*]] = fmul fast <2 x float> [[E]], [[Z:%.*]]
 ; CHECK-NEXT:    ret <2 x float> [[F]]
 ;
@@ -310,7 +310,7 @@ define <2 x float> @test10_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float
 
 define <2 x float> @test10_reassoc(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
 ; CHECK-LABEL: @test10_reassoc(
-; CHECK-NEXT:    [[D:%.*]] = fmul reassoc <2 x float> [[Z:%.*]], <float 4.000000e+01, float 4.000000e+01>
+; CHECK-NEXT:    [[D:%.*]] = fmul reassoc <2 x float> [[Z:%.*]], splat (float 4.000000e+01)
 ; CHECK-NEXT:    [[C:%.*]] = fsub reassoc <2 x float> zeroinitializer, [[D]]
 ; CHECK-NEXT:    [[E:%.*]] = fmul reassoc <2 x float> [[A:%.*]], [[C]]
 ; CHECK-NEXT:    [[F:%.*]] = fsub reassoc <2 x float> zeroinitializer, [[E]]
@@ -325,7 +325,7 @@ define <2 x float> @test10_reassoc(<2 x float> %a, <2 x float> %b, <2 x float> %
 
 define <2 x float> @test10_reassoc_unary_fneg(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
 ; CHECK-LABEL: @test10_reassoc_unary_fneg(
-; CHECK-NEXT:    [[D:%.*]] = fmul reassoc <2 x float> [[Z:%.*]], <float 4.000000e+01, float 4.000000e+01>
+; CHECK-NEXT:    [[D:%.*]] = fmul reassoc <2 x float> [[Z:%.*]], splat (float 4.000000e+01)
 ; CHECK-NEXT:    [[C:%.*]] = fneg reassoc <2 x float> [[D]]
 ; CHECK-NEXT:    [[E:%.*]] = fmul reassoc <2 x float> [[A:%.*]], [[C]]
 ; CHECK-NEXT:    [[F:%.*]] = fneg reassoc <2 x float> [[E]]
@@ -343,7 +343,7 @@ define <2 x float> @test10_reassoc_unary_fneg(<2 x float> %a, <2 x float> %b, <2
 define <2 x double> @test11(<2 x double> %x, <2 x double> %y) {
 ; CHECK-LABEL: @test11(
 ; CHECK-NEXT:    [[FACTOR:%.*]] = fmul fast <2 x double> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[REASS_MUL:%.*]] = fmul fast <2 x double> [[FACTOR]], <double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT:    [[REASS_MUL:%.*]] = fmul fast <2 x double> [[FACTOR]], splat (double 2.000000e+00)
 ; CHECK-NEXT:    ret <2 x double> [[REASS_MUL]]
 ;
   %1 = fmul fast <2 x double> %x, %y
@@ -372,7 +372,7 @@ define <2 x double> @test11_reassoc(<2 x double> %x, <2 x double> %y) {
 define <2 x i64> @test12(<2 x i64> %b, <2 x i64> %c) {
 ; CHECK-LABEL: @test12(
 ; CHECK-NEXT:    [[MUL:%.*]] = mul <2 x i64> [[C:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i64> [[MUL]], <i64 5, i64 5>
+; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i64> [[MUL]], splat (i64 5)
 ; CHECK-NEXT:    ret <2 x i64> [[SHL]]
 ;
   %mul = mul <2 x i64> %c, %b
@@ -384,7 +384,7 @@ define <2 x i64> @test12(<2 x i64> %b, <2 x i64> %c) {
 
 define <4 x float> @test13(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @test13(
-; CHECK-NEXT:    [[MUL:%.*]] = fmul fast <4 x float> [[B:%.*]], <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>
+; CHECK-NEXT:    [[MUL:%.*]] = fmul fast <4 x float> [[B:%.*]], splat (float 5.000000e+00)
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast <4 x float> [[A:%.*]], [[MUL]]
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
diff --git a/llvm/test/Transforms/Reassociate/negation.ll b/llvm/test/Transforms/Reassociate/negation.ll
index 14ae86fb94aaba..81e888a9476888 100644
--- a/llvm/test/Transforms/Reassociate/negation.ll
+++ b/llvm/test/Transforms/Reassociate/negation.ll
@@ -33,7 +33,7 @@ define i32 @test2(i32 %a, i32 %b, i32 %z) {
 
 define <2 x i32> @negate_vec_poisons(<2 x i32> %a, <2 x i32> %b, <2 x i32> %z) {
 ; CHECK-LABEL: @negate_vec_poisons(
-; CHECK-NEXT:    [[E:%.*]] = mul <2 x i32> [[A:%.*]], <i32 40, i32 40>
+; CHECK-NEXT:    [[E:%.*]] = mul <2 x i32> [[A:%.*]], splat (i32 40)
 ; CHECK-NEXT:    [[F:%.*]] = mul <2 x i32> [[E]], [[Z:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[F]]
 ;
@@ -51,7 +51,7 @@ define <2 x i32> @PR57683(<2 x i32> %x) {
 ; CHECK-NEXT:    [[PARTIAL_NEG:%.*]] = sub <2 x i32> <i32 poison, i32 0>, [[X:%.*]]
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[PARTIAL_NEG]], <2 x i32> [[X]], <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    [[X_NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X]]
-; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[X_NEG]], <i32 1, i32 1>
+; CHECK-NEXT:    [[SUB:%.*]] = add <2 x i32> [[X_NEG]], splat (i32 1)
 ; CHECK-NEXT:    [[R:%.*]] = add <2 x i32> [[SUB]], [[SHUF]]
 ; CHECK-NEXT:    ret <2 x i32> [[R]]
 ;
@@ -67,7 +67,7 @@ define <2 x float> @PR57683_FP(<2 x float> %x) {
 ; CHECK-NEXT:    [[PARTIAL_NEG:%.*]] = fsub reassoc nsz <2 x float> <float poison, float 0.000000e+00>, [[X:%.*]]
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x float> [[PARTIAL_NEG]], <2 x float> [[X]], <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    [[X_NEG:%.*]] = fneg reassoc nsz <2 x float> [[X]]
-; CHECK-NEXT:    [[SUB:%.*]] = fadd reassoc nsz <2 x float> [[X_NEG]], <float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[SUB:%.*]] = fadd reassoc nsz <2 x float> [[X_NEG]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[R:%.*]] = fadd reassoc nsz <2 x float> [[SUB]], [[SHUF]]
 ; CHECK-NEXT:    ret <2 x float> [[R]]
 ;
diff --git a/llvm/test/Transforms/Reassociate/xor_reassoc.ll b/llvm/test/Transforms/Reassociate/xor_reassoc.ll
index 97efb8f4e59415..729535a6fa0966 100644
--- a/llvm/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/llvm/test/Transforms/Reassociate/xor_reassoc.ll
@@ -25,8 +25,8 @@ define i32 @xor1(i32 %x) {
 ;
 define <2 x i32> @xor1_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @xor1_vec(
-; CHECK-NEXT:    [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], <i32 435, i32 435>
-; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[AND_RA]], <i32 435, i32 435>
+; CHECK-NEXT:    [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 435)
+; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[AND_RA]], splat (i32 435)
 ; CHECK-NEXT:    ret <2 x i32> [[XOR]]
 ;
   %or = or <2 x i32> %x, <i32 123, i32 123>
@@ -54,7 +54,7 @@ define i32 @xor2(i32 %x, i32 %y) {
 ; Real testing case : (x & 123) ^ y ^ (x & 345) => (x & 435) ^ y
 define <2 x i32> @xor2_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @xor2_vec(
-; CHECK-NEXT:    [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], <i32 435, i32 435>
+; CHECK-NEXT:    [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 435)
 ; CHECK-NEXT:    [[XOR2:%.*]] = xor <2 x i32> [[AND_RA]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[XOR2]]
 ;
@@ -85,8 +85,8 @@ define i32 @xor3(i32 %x, i32 %y) {
 ;  c3 = ~c1 ^ c2
 define <2 x i32> @xor3_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @xor3_vec(
-; CHECK-NEXT:    [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], <i32 -436, i32 -436>
-; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[Y:%.*]], <i32 123, i32 123>
+; CHECK-NEXT:    [[AND_RA:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -436)
+; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[Y:%.*]], splat (i32 123)
 ; CHECK-NEXT:    [[XOR1:%.*]] = xor <2 x i32> [[XOR]], [[AND_RA]]
 ; CHECK-NEXT:    ret <2 x i32> [[XOR1]]
 ;
@@ -114,8 +114,8 @@ define i32 @xor4(i32 %x, i32 %y) {
 ; Test rule: (x | c1) ^ c2 = (x & ~c1) ^ (c1 ^ c2)
 define <2 x i32> @xor4_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @xor4_vec(
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], <i32 -124, i32 -124>
-; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[Y:%.*]], <i32 435, i32 435>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X:%.*]], splat (i32 -124)
+; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[Y:%.*]], splat (i32 435)
 ; CHECK-NEXT:    [[XOR1:%.*]] = xor <2 x i32> [[XOR]], [[AND]]
 ; CHECK-NEXT:    ret <2 x i32> [[XOR1]]
 ;
@@ -149,7 +149,7 @@ define i32 @xor_special1(i32 %x, i32 %y) {
 ;  (x | c1) ^ (x & ~c1) = c1
 define <2 x i32> @xor_special1_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @xor_special1_vec(
-; CHECK-NEXT:    [[XOR1:%.*]] = xor <2 x i32> [[Y:%.*]], <i32 123, i32 123>
+; CHECK-NEXT:    [[XOR1:%.*]] = xor <2 x i32> [[Y:%.*]], splat (i32 123)
 ; CHECK-NEXT:    ret <2 x i32> [[XOR1]]
 ;
   %or = or <2 x i32> %x, <i32 123, i32 123>
@@ -178,7 +178,7 @@ define i32 @xor_special2(i32 %x, i32 %y) {
 ;  (x | c1) ^ (x & c1) = x ^ c1
 define <2 x i32> @xor_special2_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @xor_special2_vec(
-; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], <i32 123, i32 123>
+; CHECK-NEXT:    [[XOR:%.*]] = xor <2 x i32> [[X:%.*]], splat (i32 123)
 ; CHECK-NEXT:    [[XOR1:%.*]] = xor <2 x i32> [[XOR]], [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[XOR1]]
 ;
diff --git a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
index 1f2fbb6f53cdd8..7cae2d93e03152 100644
--- a/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
+++ b/llvm/test/Transforms/SCCP/add-nuw-nsw-flags.ll
@@ -55,8 +55,8 @@ entry:
 define <4 x i8> @range_from_lshr_vec_2(<4 x i8> %a) {
 ; CHECK-LABEL: @range_from_lshr_vec_2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_SHR:%.*]] = lshr <4 x i8> [[A:%.*]], <i8 1, i8 1, i8 1, i8 1>
-; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw <4 x i8> [[A_SHR]], <i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    [[A_SHR:%.*]] = lshr <4 x i8> [[A:%.*]], splat (i8 1)
+; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw <4 x i8> [[A_SHR]], splat (i8 2)
 ; CHECK-NEXT:    ret <4 x i8> [[ADD_1]]
 ;
 entry:
@@ -196,7 +196,7 @@ entry:
 define internal <4 x i8> @test_propagate_argument(<4 x i8> %a, <4 x i8> %b) {
 ; CHECK-LABEL: @test_propagate_argument(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i8> [[A:%.*]], <i8 3, i8 3, i8 3, i8 3>
+; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i8> [[A:%.*]], splat (i8 3)
 ; CHECK-NEXT:    ret <4 x i8> [[ADD]]
 ;
 entry:
@@ -206,7 +206,7 @@ entry:
 
 define <4 x i8> @test_propagate_caller(<4 x i8> %a) {
 ; CHECK-LABEL: @test_propagate_caller(
-; CHECK-NEXT:    [[RES_1:%.*]] = call <4 x i8> @test_propagate_argument(<4 x i8> [[A:%.*]], <4 x i8> <i8 3, i8 3, i8 3, i8 3>)
+; CHECK-NEXT:    [[RES_1:%.*]] = call <4 x i8> @test_propagate_argument(<4 x i8> [[A:%.*]], <4 x i8> splat (i8 3))
 ; CHECK-NEXT:    ret <4 x i8> [[RES_1]]
 ;
   %add = add <4 x i8> <i8 1, i8 1, i8 1, i8 1>, <i8 2, i8 2, i8 2, i8 2>
diff --git a/llvm/test/Transforms/SCCP/intrinsics.ll b/llvm/test/Transforms/SCCP/intrinsics.ll
index 5edb31738e685b..27ada6339ea06c 100644
--- a/llvm/test/Transforms/SCCP/intrinsics.ll
+++ b/llvm/test/Transforms/SCCP/intrinsics.ll
@@ -127,7 +127,7 @@ define <4 x i32> @pr63380(<4 x i32> %input) {
 ; CHECK-LABEL: @pr63380(
 ; CHECK-NEXT:    [[CTLZ_1:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[INPUT:%.*]], i1 false)
 ; CHECK-NEXT:    [[CTLZ_2:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[CTLZ_1]], i1 true)
-; CHECK-NEXT:    ret <4 x i32> <i32 27, i32 27, i32 27, i32 27>
+; CHECK-NEXT:    ret <4 x i32> splat (i32 27)
 ;
   %ctlz.1 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %input, i1 false)
   %ctlz.2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %ctlz.1, i1 true)
diff --git a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll
index d9dc0459b5ced6..8c15b0820de255 100644
--- a/llvm/test/Transforms/SCCP/ip-ranges-casts.ll
+++ b/llvm/test/Transforms/SCCP/ip-ranges-casts.ll
@@ -310,7 +310,7 @@ define i16 @vector_binop_and_cast() {
 ; CHECK-LABEL: define i16 @vector_binop_and_cast() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VECINIT7:%.*]] = insertelement <8 x i16> <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i32 0
-; CHECK-NEXT:    [[REM:%.*]] = srem <8 x i16> <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>, [[VECINIT7]]
+; CHECK-NEXT:    [[REM:%.*]] = srem <8 x i16> splat (i16 2), [[VECINIT7]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[REM]] to i128
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i128 [[TMP0]] to i16
 ; CHECK-NEXT:    ret i16 [[TMP1]]
diff --git a/llvm/test/Transforms/SCCP/overdefined-ext.ll b/llvm/test/Transforms/SCCP/overdefined-ext.ll
index 05819c32d522db..e08acd20cc2ace 100644
--- a/llvm/test/Transforms/SCCP/overdefined-ext.ll
+++ b/llvm/test/Transforms/SCCP/overdefined-ext.ll
@@ -26,7 +26,7 @@ define i1 @zext_icmp(i1 %t0) {
 define <2 x i1> @zext_vector(<2 x i1> %t0) {
 ; CHECK-LABEL: @zext_vector(
 ; CHECK-NEXT:    [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T1]], <i32 2, i32 2>
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T1]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[T2]]
 ;
   %t1 = zext <2 x i1> %t0 to <2 x i32>
@@ -37,7 +37,7 @@ define <2 x i1> @zext_vector(<2 x i1> %t0) {
 define <2 x i1> @zext_vector2(<2 x i1> %t0) {
 ; CHECK-LABEL: @zext_vector2(
 ; CHECK-NEXT:    [[T1:%.*]] = zext <2 x i1> [[T0:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[T2:%.*]] = add nuw nsw <2 x i32> [[T1]], <i32 2, i32 2>
+; CHECK-NEXT:    [[T2:%.*]] = add nuw nsw <2 x i32> [[T1]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> zeroinitializer
 ;
   %t1 = zext <2 x i1> %t0 to <2 x i32>
@@ -74,7 +74,7 @@ define i1 @sext_icmp(i1 %t0) {
 define <2 x i1> @sext_vector(<2 x i1> %t0) {
 ; CHECK-LABEL: @sext_vector(
 ; CHECK-NEXT:    [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T1]], <i32 2, i32 2>
+; CHECK-NEXT:    [[T2:%.*]] = icmp eq <2 x i32> [[T1]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> [[T2]]
 ;
   %t1 = sext <2 x i1> %t0 to <2 x i32>
@@ -85,7 +85,7 @@ define <2 x i1> @sext_vector(<2 x i1> %t0) {
 define <2 x i1> @sext_vector2(<2 x i1> %t0) {
 ; CHECK-LABEL: @sext_vector2(
 ; CHECK-NEXT:    [[T1:%.*]] = sext <2 x i1> [[T0:%.*]] to <2 x i32>
-; CHECK-NEXT:    [[T2:%.*]] = add nsw <2 x i32> [[T1]], <i32 2, i32 2>
+; CHECK-NEXT:    [[T2:%.*]] = add nsw <2 x i32> [[T1]], splat (i32 2)
 ; CHECK-NEXT:    ret <2 x i1> zeroinitializer
 ;
   %t1 = sext <2 x i1> %t0 to <2 x i32>
diff --git a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
index 92d84f71bd9d46..f968ef6509748b 100644
--- a/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
+++ b/llvm/test/Transforms/SCCP/trunc-nuw-nsw-flags.ll
@@ -47,7 +47,7 @@ define <4 x i16> @range_from_and_nuw_vec(<4 x i32> %a) {
 ; CHECK-LABEL: define <4 x i16> @range_from_and_nuw_vec(
 ; CHECK-SAME: <4 x i32> [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[AND1:%.*]] = and <4 x i32> [[A]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[AND1:%.*]] = and <4 x i32> [[A]], splat (i32 65535)
 ; CHECK-NEXT:    [[TRUNC1:%.*]] = trunc nuw <4 x i32> [[AND1]] to <4 x i16>
 ; CHECK-NEXT:    ret <4 x i16> [[TRUNC1]]
 ;
diff --git a/llvm/test/Transforms/SCCP/vector-bitcast.ll b/llvm/test/Transforms/SCCP/vector-bitcast.ll
index bed5c02d73bc51..725e3bc690ebae 100644
--- a/llvm/test/Transforms/SCCP/vector-bitcast.ll
+++ b/llvm/test/Transforms/SCCP/vector-bitcast.ll
@@ -12,8 +12,8 @@ define void @foo(ptr %p) nounwind {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[WHILE_BODY_I:%.*]]
 ; CHECK:       while.body.i:
-; CHECK-NEXT:    [[VWORKEXPONENT_I_033:%.*]] = phi <4 x i32> [ [[SUB_I_I:%.*]], [[WHILE_BODY_I]] ], [ <i32 939524096, i32 939524096, i32 939524096, i32 939524096>, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[SUB_I_I]] = add <4 x i32> [[VWORKEXPONENT_I_033]], <i32 -8388608, i32 -8388608, i32 -8388608, i32 -8388608>
+; CHECK-NEXT:    [[VWORKEXPONENT_I_033:%.*]] = phi <4 x i32> [ [[SUB_I_I:%.*]], [[WHILE_BODY_I]] ], [ splat (i32 939524096), [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SUB_I_I]] = add <4 x i32> [[VWORKEXPONENT_I_033]], splat (i32 -8388608)
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[SUB_I_I]] to <2 x i64>
 ; CHECK-NEXT:    store volatile <2 x i64> zeroinitializer, ptr [[P:%.*]], align 16
 ; CHECK-NEXT:    br label [[WHILE_BODY_I]]
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll
index 0c6a976b51a1d7..2b5ee59aeb1638 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/external-use-icmp.ll
@@ -12,8 +12,8 @@ define i16 @foo(i16 %in1, i16 %in2) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <2 x i16> [[TMP3]] to <2 x i64>
 ; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw nsw <2 x i64> [[TMP5]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = and <2 x i64> [[TMP9]], <i64 65535, i64 65535>
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne <2 x i64> [[TMP6]], <i64 65533, i64 65533>
+; CHECK-NEXT:    [[TMP12:%.*]] = and <2 x i64> [[TMP9]], splat (i64 65535)
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne <2 x i64> [[TMP12]], splat (i64 65533)
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
 ; CHECK-NEXT:    [[ZEXT3_1:%.*]] = zext i1 [[TMP8]] to i16
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
index e60e356e5cd819..f9953df6c1735d 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
@@ -33,8 +33,8 @@ define void @dist_vec(ptr nocapture noundef readonly %pA, ptr nocapture noundef
 ; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, ptr [[PA_ADDR_0271]], i64 4
 ; CHECK-NEXT:    [[ADD_PTR8]] = getelementptr inbounds i32, ptr [[PB_ADDR_0270]], i64 4
 ; CHECK-NEXT:    [[AND_I:%.*]] = and <4 x i32> [[TMP5]], [[TMP4]]
-; CHECK-NEXT:    [[NOT_I:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
-; CHECK-NEXT:    [[NOT_I242:%.*]] = xor <4 x i32> [[TMP5]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[NOT_I:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1)
+; CHECK-NEXT:    [[NOT_I242:%.*]] = xor <4 x i32> [[TMP5]], splat (i32 -1)
 ; CHECK-NEXT:    [[AND_I243:%.*]] = and <4 x i32> [[NOT_I242]], [[NOT_I]]
 ; CHECK-NEXT:    [[AND_I245:%.*]] = and <4 x i32> [[TMP4]], [[NOT_I242]]
 ; CHECK-NEXT:    [[AND_I247:%.*]] = and <4 x i32> [[TMP5]], [[NOT_I]]
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
index 45030a0965e006..ec9c01c99256b0 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll
@@ -20,9 +20,9 @@ define internal i32 @gather_multiple_use(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[D:%.*]], i64 3
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i32> [[TMP5]], <i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], splat (i32 15)
+; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i32> [[TMP5]], splat (i32 65537)
+; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[TMP4]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
index 8987f34e561e6f..7ae336e2ccee98 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/gather-root.ll
@@ -16,7 +16,7 @@ define void @PR28330(i32 %n) {
 ; DEFAULT-NEXT:    br label [[FOR_BODY:%.*]]
 ; DEFAULT:       for.body:
 ; DEFAULT-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; DEFAULT-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; DEFAULT-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80)
 ; DEFAULT-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
 ; DEFAULT-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], [[P17]]
 ; DEFAULT-NEXT:    br label [[FOR_BODY]]
@@ -28,7 +28,7 @@ define void @PR28330(i32 %n) {
 ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
 ; GATHER:       for.body:
 ; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; GATHER-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; GATHER-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80)
 ; GATHER-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
 ; GATHER-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], [[P17]]
 ; GATHER-NEXT:    br label [[FOR_BODY]]
@@ -40,7 +40,7 @@ define void @PR28330(i32 %n) {
 ; MAX-COST-NEXT:    br label [[FOR_BODY:%.*]]
 ; MAX-COST:       for.body:
 ; MAX-COST-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; MAX-COST-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; MAX-COST-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80)
 ; MAX-COST-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
 ; MAX-COST-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], [[P17]]
 ; MAX-COST-NEXT:    br label [[FOR_BODY]]
@@ -93,7 +93,7 @@ define void @PR32038(i32 %n) {
 ; DEFAULT-NEXT:    br label [[FOR_BODY:%.*]]
 ; DEFAULT:       for.body:
 ; DEFAULT-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; DEFAULT-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; DEFAULT-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80)
 ; DEFAULT-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
 ; DEFAULT-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], -5
 ; DEFAULT-NEXT:    br label [[FOR_BODY]]
@@ -105,7 +105,7 @@ define void @PR32038(i32 %n) {
 ; GATHER-NEXT:    br label [[FOR_BODY:%.*]]
 ; GATHER:       for.body:
 ; GATHER-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; GATHER-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; GATHER-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80)
 ; GATHER-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
 ; GATHER-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], -5
 ; GATHER-NEXT:    br label [[FOR_BODY]]
@@ -117,7 +117,7 @@ define void @PR32038(i32 %n) {
 ; MAX-COST-NEXT:    br label [[FOR_BODY:%.*]]
 ; MAX-COST:       for.body:
 ; MAX-COST-NEXT:    [[P17:%.*]] = phi i32 [ [[OP_RDX:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
-; MAX-COST-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> <i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720, i32 -720>, <8 x i32> <i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80, i32 -80>
+; MAX-COST-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> splat (i32 -720), <8 x i32> splat (i32 -80)
 ; MAX-COST-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
 ; MAX-COST-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], -5
 ; MAX-COST-NEXT:    br label [[FOR_BODY]]
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll
index 90a30808aeeba9..a5371cd051a5b9 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadi8.ll
@@ -20,7 +20,7 @@ define void @f_noalias(ptr noalias nocapture %dst, ptr noalias nocapture readonl
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult <4 x i32> [[TMP9]], <i32 256, i32 256, i32 256, i32 256>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult <4 x i32> [[TMP9]], splat (i32 256)
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt <4 x i32> [[TMP9]], zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = sext <4 x i1> [[TMP11]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP9]], <4 x i32> [[TMP12]]
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
index 5f0b16048d40c8..7622f9bc5c41d9 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll
@@ -1257,7 +1257,7 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1,
 ; CHECK-NEXT:    [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]]
-; CHECK-NEXT:    [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP43:%.*]] = add nsw <16 x i32> [[TMP42]], [[TMP26]]
 ; CHECK-NEXT:    [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 ; CHECK-NEXT:    [[TMP45:%.*]] = add nsw <16 x i32> [[TMP43]], [[TMP44]]
@@ -1275,9 +1275,9 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1,
 ; CHECK-NEXT:    [[TMP57:%.*]] = add nsw <16 x i32> [[TMP55]], [[TMP56]]
 ; CHECK-NEXT:    [[TMP58:%.*]] = sub nsw <16 x i32> [[TMP55]], [[TMP56]]
 ; CHECK-NEXT:    [[TMP59:%.*]] = shufflevector <16 x i32> [[TMP57]], <16 x i32> [[TMP58]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
-; CHECK-NEXT:    [[TMP60:%.*]] = lshr <16 x i32> [[TMP59]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP61:%.*]] = and <16 x i32> [[TMP60]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP62:%.*]] = mul nuw <16 x i32> [[TMP61]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP60:%.*]] = lshr <16 x i32> [[TMP59]], splat (i32 15)
+; CHECK-NEXT:    [[TMP61:%.*]] = and <16 x i32> [[TMP60]], splat (i32 65537)
+; CHECK-NEXT:    [[TMP62:%.*]] = mul nuw <16 x i32> [[TMP61]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP63:%.*]] = add <16 x i32> [[TMP62]], [[TMP59]]
 ; CHECK-NEXT:    [[TMP64:%.*]] = xor <16 x i32> [[TMP63]], [[TMP62]]
 ; CHECK-NEXT:    [[TMP65:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP64]])
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
index 8f6d5d8f2d7ec8..9f5744b17cb79e 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll
@@ -325,7 +325,7 @@ define void @no_version(ptr nocapture %dst, ptr nocapture readonly %src) {
 ; CHECK-LABEL: @no_version(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[TMP0]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i32> [[TMP0]], splat (i32 16)
 ; CHECK-NEXT:    store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -1242,13 +1242,13 @@ define void @crash_no_tracked_instructions(ptr %arg, ptr %arg.2, ptr %arg.3, i1
 ; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x float> [[TMP2]], <float 9.900000e+01, float 1.000000e+01>
 ; CHECK-NEXT:    store float [[T26]], ptr [[T25]], align 4
 ; CHECK-NEXT:    [[T27:%.*]] = load float, ptr [[ARG_2:%.*]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x float> [[TMP3]], <float 2.000000e+01, float 2.000000e+01>
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x float> [[TMP3]], splat (float 2.000000e+01)
 ; CHECK-NEXT:    br label [[BB30]]
 ; CHECK:       bb30:
 ; CHECK-NEXT:    [[TMP5:%.*]] = phi <2 x float> [ [[TMP4]], [[BB22]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    br label [[BB36:%.*]]
 ; CHECK:       bb36:
-; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x float> [[TMP5]], <float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x float> [[TMP5]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    store <2 x float> [[TMP6]], ptr [[ARG_3]], align 4
 ; CHECK-NEXT:    br label [[BB41:%.*]]
 ; CHECK:       bb41:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
index 0c3c695a308cd4..476068dcd04969 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/sdiv-pow2.ll
@@ -9,7 +9,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b,
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], splat (i32 2)
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
index 1bd63b79b0f5ca..f79db7d7ad0cbd 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll
@@ -200,9 +200,9 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 7, i32 2>
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 6, i32 3>
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i32> [[TMP6]], <i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP8:%.*]] = mul nuw <4 x i32> [[TMP7]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], splat (i32 15)
+; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i32> [[TMP6]], splat (i32 65537)
+; CHECK-NEXT:    [[TMP8:%.*]] = mul nuw <4 x i32> [[TMP7]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i32> [[TMP9]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
index f99f6ecd333823..1330e5557e559b 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll
@@ -200,9 +200,9 @@ define i32 @reduction_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 7, i32 2>
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 4, i32 6, i32 3>
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i32> [[TMP6]], <i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP8:%.*]] = mul nuw <4 x i32> [[TMP7]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], splat (i32 15)
+; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i32> [[TMP6]], splat (i32 65537)
+; CHECK-NEXT:    [[TMP8:%.*]] = mul nuw <4 x i32> [[TMP7]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i32> [[TMP9]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
index 0eda2cbc862ff0..9910090d43eae9 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/trunc-insertion.ll
@@ -13,7 +13,7 @@ define dso_local void @l() local_unnamed_addr {
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[I4:%.*]] = zext i1 undef to i32
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i16> [[TMP0]], undef
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], <i16 8, i16 8>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i16> [[TMP1]], splat (i16 8)
 ; CHECK-NEXT:    br label [[BB25]]
 ; CHECK:       bb11:
 ; CHECK-NEXT:    [[I12:%.*]] = zext i1 undef to i32
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll
index 7b27489782fc46..6e596fccd18f39 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll
@@ -8,7 +8,7 @@ define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) {
 ; NON-POW2-NEXT:  entry:
 ; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
 ; NON-POW2-NEXT:    [[TMP0:%.*]] = load <15 x i8>, ptr [[GEP_SRC_0]], align 4
-; NON-POW2-NEXT:    [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
+; NON-POW2-NEXT:    [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], splat (i8 10)
 ; NON-POW2-NEXT:    store <15 x i8> [[TMP1]], ptr [[DST]], align 1
 ; NON-POW2-NEXT:    ret void
 ;
@@ -17,12 +17,12 @@ define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) {
 ; POW2-ONLY-NEXT:  entry:
 ; POW2-ONLY-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
 ; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4
-; POW2-ONLY-NEXT:    [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
+; POW2-ONLY-NEXT:    [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], splat (i8 10)
 ; POW2-ONLY-NEXT:    store <8 x i8> [[TMP1]], ptr [[DST]], align 1
 ; POW2-ONLY-NEXT:    [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8
 ; POW2-ONLY-NEXT:    [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8
 ; POW2-ONLY-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4
-; POW2-ONLY-NEXT:    [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], <i8 10, i8 10, i8 10, i8 10>
+; POW2-ONLY-NEXT:    [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], splat (i8 10)
 ; POW2-ONLY-NEXT:    store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1
 ; POW2-ONLY-NEXT:    [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12
 ; POW2-ONLY-NEXT:    [[L_SRC_12:%.*]] = load i8, ptr [[GEP_SRC_12]], align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
index c18811a35c1eeb..feb4ad865f3147 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
@@ -7,7 +7,7 @@ define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) {
 ; NON-POW2-NEXT:  entry:
 ; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
 ; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
-; NON-POW2-NEXT:    [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], <i32 10, i32 10, i32 10>
+; NON-POW2-NEXT:    [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], splat (i32 10)
 ; NON-POW2-NEXT:    store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
 ; NON-POW2-NEXT:    ret void
 ;
@@ -18,7 +18,7 @@ define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) {
 ; POW2-ONLY-NEXT:    [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
 ; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
 ; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_0]], align 4
-; POW2-ONLY-NEXT:    [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], <i32 10, i32 10>
+; POW2-ONLY-NEXT:    [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], splat (i32 10)
 ; POW2-ONLY-NEXT:    store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
 ; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
 ; POW2-ONLY-NEXT:    store i32 [[MUL_2]], ptr [[DST_2]], align 4
@@ -160,7 +160,7 @@ define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) {
 ; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4
 ; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4
 ; NON-POW2-NEXT:    [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]]
-; NON-POW2-NEXT:    [[TMP3:%.*]] = add <3 x i32> [[TMP2]], <i32 9, i32 9, i32 9>
+; NON-POW2-NEXT:    [[TMP3:%.*]] = add <3 x i32> [[TMP2]], splat (i32 9)
 ; NON-POW2-NEXT:    store <3 x i32> [[TMP3]], ptr [[DST:%.*]], align 4
 ; NON-POW2-NEXT:    ret void
 ;
@@ -177,7 +177,7 @@ define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) {
 ; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4
 ; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4
 ; POW2-ONLY-NEXT:    [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]]
-; POW2-ONLY-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP2]], <i32 9, i32 9>
+; POW2-ONLY-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP2]], splat (i32 9)
 ; POW2-ONLY-NEXT:    store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4
 ; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
 ; POW2-ONLY-NEXT:    store i32 [[ADD_2]], ptr [[DST_2]], align 4
@@ -221,7 +221,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) {
 ; NON-POW2-NEXT:  entry:
 ; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
 ; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4
-; NON-POW2-NEXT:    [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>
+; NON-POW2-NEXT:    [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01)
 ; NON-POW2-NEXT:    store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4
 ; NON-POW2-NEXT:    ret void
 ;
@@ -232,7 +232,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) {
 ; POW2-ONLY-NEXT:    [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
 ; POW2-ONLY-NEXT:    [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01
 ; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4
-; POW2-ONLY-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01>
+; POW2-ONLY-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01)
 ; POW2-ONLY-NEXT:    store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4
 ; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2
 ; POW2-ONLY-NEXT:    store float [[FADD_2]], ptr [[DST_2]], align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll
index c9b2e0ffc15f3d..2d94babb568742 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll
@@ -568,8 +568,8 @@ define void @can_reorder_vec3_op_with_padding(ptr %A, <3 x float> %in) {
 ; NON-POW2-SAME: ptr [[A:%.*]], <3 x float> [[IN:%.*]]) {
 ; NON-POW2-NEXT:  entry:
 ; NON-POW2-NEXT:    [[TMP1:%.*]] = fsub <3 x float> [[IN]], [[IN]]
-; NON-POW2-NEXT:    [[TMP2:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP1]], <3 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, <3 x float> <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>)
-; NON-POW2-NEXT:    [[TMP3:%.*]] = fmul <3 x float> [[TMP2]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+; NON-POW2-NEXT:    [[TMP2:%.*]] = call <3 x float> @llvm.fmuladd.v3f32(<3 x float> [[TMP1]], <3 x float> splat (float 2.000000e+00), <3 x float> splat (float 3.000000e+00))
+; NON-POW2-NEXT:    [[TMP3:%.*]] = fmul <3 x float> [[TMP2]], splat (float 3.000000e+00)
 ; NON-POW2-NEXT:    [[TMP4:%.*]] = shufflevector <3 x float> [[TMP3]], <3 x float> poison, <3 x i32> <i32 1, i32 2, i32 0>
 ; NON-POW2-NEXT:    store <3 x float> [[TMP4]], ptr [[A]], align 4
 ; NON-POW2-NEXT:    ret void
@@ -584,8 +584,8 @@ define void @can_reorder_vec3_op_with_padding(ptr %A, <3 x float> %in) {
 ; POW2-ONLY-NEXT:    [[MUL6_I_I_I_I:%.*]] = fmul float [[TMP1]], 3.000000e+00
 ; POW2-ONLY-NEXT:    [[TMP2:%.*]] = shufflevector <3 x float> [[IN]], <3 x float> poison, <2 x i32> <i32 1, i32 2>
 ; POW2-ONLY-NEXT:    [[TMP3:%.*]] = fsub <2 x float> [[TMP2]], [[TMP2]]
-; POW2-ONLY-NEXT:    [[TMP4:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> <float 3.000000e+00, float 3.000000e+00>)
-; POW2-ONLY-NEXT:    [[TMP5:%.*]] = fmul <2 x float> [[TMP4]], <float 3.000000e+00, float 3.000000e+00>
+; POW2-ONLY-NEXT:    [[TMP4:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> splat (float 2.000000e+00), <2 x float> splat (float 3.000000e+00))
+; POW2-ONLY-NEXT:    [[TMP5:%.*]] = fmul <2 x float> [[TMP4]], splat (float 3.000000e+00)
 ; POW2-ONLY-NEXT:    store <2 x float> [[TMP5]], ptr [[A]], align 4
 ; POW2-ONLY-NEXT:    store float [[MUL6_I_I_I_I]], ptr [[ARRAYIDX42_I]], align 4
 ; POW2-ONLY-NEXT:    ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
index ddeaff9327e3d2..77371c2ac7b1cd 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
@@ -9,8 +9,8 @@ define void @select_umin_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_umin_8xi16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], splat (i16 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
 ; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
@@ -68,8 +68,8 @@ define void @select_umin_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_umin_4xi32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -104,10 +104,10 @@ define void @select_ule_ugt_mix_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_ule_ugt_mix_4xi32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt <4 x i32> [[TMP1]], splat (i32 16383)
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
+; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -144,8 +144,8 @@ define void @select_umin_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_umin_2xi64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], splat (i64 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
 ; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -169,8 +169,8 @@ define void @select_umin_ule_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_umin_ule_8xi16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], splat (i16 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
 ; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
@@ -228,8 +228,8 @@ define void @select_umin_ule_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_umin_ule_4xi32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], splat (i32 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -266,8 +266,8 @@ define void @select_umin_ule_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_umin_ule_2xi64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <2 x i64> [[TMP1]], splat (i64 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
 ; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -290,8 +290,8 @@ define void @select_smin_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_smin_8xi16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], splat (i16 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
 ; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
@@ -349,8 +349,8 @@ define void @select_smin_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_smin_4xi32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], splat (i32 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -387,8 +387,8 @@ define void @select_smin_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_smin_2xi64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <2 x i64> [[TMP1]], splat (i64 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
 ; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -411,8 +411,8 @@ define void @select_smin_sle_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_smin_sle_8xi16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], splat (i16 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
 ; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
@@ -470,8 +470,8 @@ define void @select_smin_sle_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_smin_sle_4xi32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], splat (i32 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -508,8 +508,8 @@ define void @select_smin_sle_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_smin_sle_2xi64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sle <2 x i64> [[TMP1]], splat (i64 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
 ; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -531,8 +531,8 @@ define void @select_umax_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_umax_8xi16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], splat (i16 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
 ; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
@@ -590,8 +590,8 @@ define void @select_umax_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_umax_4xi32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], splat (i32 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -628,8 +628,8 @@ define void @select_umax_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_umax_2xi64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i64> [[TMP1]], splat (i64 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
 ; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -652,8 +652,8 @@ define void @select_umax_uge_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_umax_uge_8xi16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], splat (i16 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
 ; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
@@ -711,8 +711,8 @@ define void @select_umax_uge_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_umax_uge_4xi32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], splat (i32 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -749,8 +749,8 @@ define void @select_umax_uge_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_umax_uge_2xi64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], splat (i64 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
 ; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -773,8 +773,8 @@ define void @select_smax_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_smax_8xi16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], splat (i16 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
 ; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
@@ -832,8 +832,8 @@ define void @select_smax_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_smax_4xi32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], splat (i32 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -870,8 +870,8 @@ define void @select_smax_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_smax_2xi64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <2 x i64> [[TMP1]], splat (i64 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
 ; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -895,8 +895,8 @@ define void @select_smax_sge_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_smax_sge_8xi16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], splat (i16 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> splat (i16 16383)
 ; CHECK-NEXT:    store <8 x i16> [[TMP3]], ptr [[PTR]], align 2
 ; CHECK-NEXT:    ret void
 ;
@@ -954,8 +954,8 @@ define void @select_smax_sge_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_smax_sge_4xi32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], splat (i32 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> splat (i32 16383)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -992,8 +992,8 @@ define void @select_smax_sge_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_smax_sge_2xi64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], <i64 16383, i64 16383>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> <i64 16383, i64 16383>
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge <2 x i64> [[TMP1]], splat (i64 16383)
+; CHECK-NEXT:    [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[TMP1]], <2 x i64> splat (i64 16383)
 ; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
index 0fe4e6a5aa28b5..f9e415a3cefc13 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-uniform-cmps.ll
@@ -107,7 +107,7 @@ define void @select_uniform_ugt_7xi8(ptr %ptr, i8 %x) {
 ; CHECK-LABEL: @select_uniform_ugt_7xi8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i8> [[TMP0]], <i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i8> [[TMP0]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TMP0]], <4 x i8> [[TMP3]]
@@ -181,7 +181,7 @@ define void @select_uniform_ugt_8xi8(ptr %ptr, i8 %x) {
 ; CHECK-LABEL: @select_uniform_ugt_8xi8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <8 x i8> [[TMP0]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <8 x i8> [[TMP0]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i8> [[TMP0]], <8 x i8> [[TMP3]]
@@ -258,7 +258,7 @@ define void @select_uniform_ugt_16xi8(ptr %ptr, i8 %x) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 [[L_11]], i32 11
 ; CHECK-NEXT:    [[TMP7:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP6]], <8 x i8> [[TMP0]], i64 0)
 ; CHECK-NEXT:    [[TMP8:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP7]], <4 x i8> [[TMP3]], i64 12)
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp ugt <16 x i8> [[TMP8]], splat (i8 -1)
 ; CHECK-NEXT:    [[TMP10:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v8i8(<16 x i8> [[TMP8]], <8 x i8> [[TMP0]], i64 0)
 ; CHECK-NEXT:    [[TMP11:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP10]], <4 x i8> [[TMP3]], i64 12)
 ; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <16 x i8> poison, i8 [[X]], i32 0
@@ -371,7 +371,7 @@ define void @select_uniform_ugt_4xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_uniform_ugt_4xi16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[PTR:%.*]], align 2
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i16> [[TMP0]], <i16 16383, i16 16383, i16 16383, i16 16383>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i16> [[TMP0]], splat (i16 16383)
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> [[TMP0]], <4 x i16> [[TMP3]]
@@ -409,7 +409,7 @@ define void @select_uniform_ult_8xi16(ptr %ptr, i16 %x) {
 ; CHECK-LABEL: @select_uniform_ult_8xi16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <8 x i16> [[TMP0]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <8 x i16> [[TMP0]], splat (i16 16383)
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i16> poison, i16 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP0]], <8 x i16> [[TMP3]]
@@ -470,7 +470,7 @@ define void @select_uniform_eq_2xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_uniform_eq_2xi32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[TMP0]], <i32 16383, i32 16383>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i32> [[TMP0]], splat (i32 16383)
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP0]], <2 x i32> [[TMP3]]
@@ -496,7 +496,7 @@ define void @select_uniform_eq_4xi32(ptr %ptr, i32 %x) {
 ; CHECK-LABEL: @select_uniform_eq_4xi32(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i32> [[TMP0]], <i32 16383, i32 16383, i32 16383, i32 16383>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i32> [[TMP0]], splat (i32 16383)
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP0]], <4 x i32> [[TMP3]]
@@ -533,7 +533,7 @@ define void @select_uniform_ne_2xi64(ptr %ptr, i64 %x) {
 ; CHECK-LABEL: @select_uniform_ne_2xi64(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i64> [[TMP0]], <i64 16383, i64 16383>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i64> [[TMP0]], splat (i64 16383)
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP0]], <2 x i64> [[TMP3]]
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll
index a122d84629c1a2..b8bf38af3668dd 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/widen.ll
@@ -14,10 +14,10 @@ define void @PR50256(ptr %a, ptr %b, i32 %n) {
 ; CHECK-NEXT:    [[ARRAYIDX3_8:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[A]], align 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <8 x i8> [[TMP2]] to <8 x i16>
-; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw <8 x i16> [[TMP3]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[TMP4:%.*]] = shl nuw <8 x i16> [[TMP3]], splat (i16 8)
 ; CHECK-NEXT:    [[TMP7:%.*]] = load <8 x i8>, ptr [[ARRAYIDX_8]], align 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = zext <8 x i8> [[TMP7]] to <8 x i16>
-; CHECK-NEXT:    [[TMP9:%.*]] = shl nuw <8 x i16> [[TMP8]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT:    [[TMP9:%.*]] = shl nuw <8 x i16> [[TMP8]], splat (i16 8)
 ; CHECK-NEXT:    store <8 x i16> [[TMP4]], ptr [[B]], align 2
 ; CHECK-NEXT:    store <8 x i16> [[TMP9]], ptr [[ARRAYIDX3_8]], align 2
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll b/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll
index 5132e491811691..13773bf901b9bf 100644
--- a/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll
+++ b/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll
@@ -11,8 +11,8 @@ define void @fusion(ptr noalias nocapture align 256 dereferenceable(19267584) %a
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds half, ptr [[ARG1:%.*]], i64 [[TMP6]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds half, ptr [[ARG:%.*]], i64 [[TMP6]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x half>, ptr [[TMP11]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x half> [[TMP1]], <half 0xH5380, half 0xH5380>
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <2 x half> [[TMP2]], <half 0xH57F0, half 0xH57F0>
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x half> [[TMP1]], splat (half 0xH5380)
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <2 x half> [[TMP2]], splat (half 0xH57F0)
 ; CHECK-NEXT:    store <2 x half> [[TMP3]], ptr [[TMP16]], align 8
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
index 8c6b92b65ae050..caba742897d99f 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/complex-loads.ll
@@ -46,7 +46,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP50:%.*]] = zext <2 x i8> [[TMP27]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP24:%.*]] = sub <2 x i32> [[TMP26]], [[TMP50]]
-; CHECK-NEXT:    [[TMP25:%.*]] = shl <2 x i32> [[TMP24]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP25:%.*]] = shl <2 x i32> [[TMP24]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP30:%.*]] = add <2 x i32> [[TMP25]], [[TMP23]]
 ; CHECK-NEXT:    [[TMP32:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP51:%.*]] = zext <2 x i8> [[TMP32]] to <2 x i32>
@@ -59,7 +59,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP40:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP61:%.*]] = zext <2 x i8> [[TMP40]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP36:%.*]] = sub <2 x i32> [[TMP39]], [[TMP61]]
-; CHECK-NEXT:    [[TMP37:%.*]] = shl <2 x i32> [[TMP36]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP37:%.*]] = shl <2 x i32> [[TMP36]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP42:%.*]] = add <2 x i32> [[TMP37]], [[TMP35]]
 ; CHECK-NEXT:    [[TMP43:%.*]] = add <2 x i32> [[TMP42]], [[TMP30]]
 ; CHECK-NEXT:    [[TMP44:%.*]] = sub <2 x i32> [[TMP30]], [[TMP42]]
@@ -83,7 +83,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP63:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1
 ; CHECK-NEXT:    [[TMP76:%.*]] = zext <2 x i8> [[TMP63]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP45:%.*]] = sub <2 x i32> [[TMP60]], [[TMP76]]
-; CHECK-NEXT:    [[TMP46:%.*]] = shl <2 x i32> [[TMP45]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP46:%.*]] = shl <2 x i32> [[TMP45]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP90:%.*]] = add <2 x i32> [[TMP46]], [[TMP59]]
 ; CHECK-NEXT:    [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2
 ; CHECK-NEXT:    [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2
@@ -98,7 +98,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP100:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1
 ; CHECK-NEXT:    [[TMP103:%.*]] = zext <2 x i8> [[TMP100]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP69:%.*]] = sub <2 x i32> [[TMP98]], [[TMP103]]
-; CHECK-NEXT:    [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP74:%.*]] = add <2 x i32> [[TMP70]], [[TMP65]]
 ; CHECK-NEXT:    [[TMP78:%.*]] = extractelement <2 x i32> [[TMP90]], i32 0
 ; CHECK-NEXT:    [[TMP71:%.*]] = extractelement <2 x i32> [[TMP90]], i32 1
@@ -160,7 +160,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP109:%.*]] = shufflevector <4 x i8> [[TMP150]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP89:%.*]] = zext <2 x i8> [[TMP109]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP87:%.*]] = sub <2 x i32> [[TMP108]], [[TMP89]]
-; CHECK-NEXT:    [[TMP88:%.*]] = shl <2 x i32> [[TMP87]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP88:%.*]] = shl <2 x i32> [[TMP87]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP112:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP120:%.*]] = zext <2 x i8> [[TMP112]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP94:%.*]] = shufflevector <4 x i8> [[TMP149]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
@@ -168,7 +168,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP131:%.*]] = shufflevector <4 x i8> [[TMP150]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP132:%.*]] = zext <2 x i8> [[TMP131]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP95:%.*]] = sub <2 x i32> [[TMP128]], [[TMP132]]
-; CHECK-NEXT:    [[TMP96:%.*]] = shl <2 x i32> [[TMP95]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP96:%.*]] = shl <2 x i32> [[TMP95]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP97:%.*]] = insertelement <2 x i32> [[TMP102]], i32 [[CONV33]], i32 1
 ; CHECK-NEXT:    [[TMP117:%.*]] = sub <2 x i32> [[TMP97]], [[TMP120]]
 ; CHECK-NEXT:    [[TMP105:%.*]] = add <2 x i32> [[TMP96]], [[TMP117]]
@@ -204,7 +204,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP118:%.*]] = shufflevector <4 x i8> [[TMP159]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
 ; CHECK-NEXT:    [[TMP134:%.*]] = zext <2 x i8> [[TMP118]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP124:%.*]] = sub <2 x i32> [[TMP115]], [[TMP134]]
-; CHECK-NEXT:    [[TMP125:%.*]] = shl <2 x i32> [[TMP124]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP125:%.*]] = shl <2 x i32> [[TMP124]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP127:%.*]] = shufflevector <4 x i8> [[TMP158]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP191:%.*]] = zext <2 x i8> [[TMP127]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP160:%.*]] = shufflevector <4 x i8> [[TMP121]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
@@ -212,7 +212,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[TMP171:%.*]] = shufflevector <4 x i8> [[TMP159]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP172:%.*]] = zext <2 x i8> [[TMP171]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP135:%.*]] = sub <2 x i32> [[TMP161]], [[TMP172]]
-; CHECK-NEXT:    [[TMP136:%.*]] = shl <2 x i32> [[TMP135]], <i32 16, i32 16>
+; CHECK-NEXT:    [[TMP136:%.*]] = shl <2 x i32> [[TMP135]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP137:%.*]] = insertelement <2 x i32> [[TMP110]], i32 [[CONV33_1]], i32 1
 ; CHECK-NEXT:    [[TMP173:%.*]] = sub <2 x i32> [[TMP137]], [[TMP191]]
 ; CHECK-NEXT:    [[TMP174:%.*]] = add <2 x i32> [[TMP136]], [[TMP173]]
@@ -235,9 +235,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[SHR_I54_1:%.*]] = lshr i32 [[SUB47_1]], 15
 ; CHECK-NEXT:    [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537
 ; CHECK-NEXT:    [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535
-; CHECK-NEXT:    [[TMP194:%.*]] = lshr <2 x i32> [[TMP110]], <i32 15, i32 15>
-; CHECK-NEXT:    [[TMP154:%.*]] = and <2 x i32> [[TMP194]], <i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP195:%.*]] = mul <2 x i32> [[TMP154]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP154:%.*]] = lshr <2 x i32> [[TMP110]], splat (i32 15)
+; CHECK-NEXT:    [[TMP184:%.*]] = and <2 x i32> [[TMP154]], splat (i32 65537)
+; CHECK-NEXT:    [[TMP195:%.*]] = mul <2 x i32> [[TMP184]], splat (i32 65535)
 ; CHECK-NEXT:    [[ADD78:%.*]] = add i32 [[ADD48_1]], [[ADD55]]
 ; CHECK-NEXT:    [[SUB86:%.*]] = sub i32 [[ADD55]], [[ADD48_1]]
 ; CHECK-NEXT:    [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]]
@@ -312,9 +312,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; CHECK-NEXT:    [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]]
 ; CHECK-NEXT:    [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_4]], [[ADD105_3]]
 ; CHECK-NEXT:    [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV1]]
-; CHECK-NEXT:    [[TMP184:%.*]] = lshr <2 x i32> [[TMP102]], <i32 15, i32 15>
-; CHECK-NEXT:    [[TMP185:%.*]] = and <2 x i32> [[TMP184]], <i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP186:%.*]] = mul <2 x i32> [[TMP185]], <i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP185:%.*]] = lshr <2 x i32> [[TMP102]], splat (i32 15)
+; CHECK-NEXT:    [[TMP193:%.*]] = and <2 x i32> [[TMP185]], splat (i32 65537)
+; CHECK-NEXT:    [[TMP186:%.*]] = mul <2 x i32> [[TMP193]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP187:%.*]] = add <2 x i32> [[TMP186]], [[TMP183]]
 ; CHECK-NEXT:    [[TMP188:%.*]] = xor <2 x i32> [[TMP187]], [[TMP102]]
 ; CHECK-NEXT:    [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15
@@ -373,7 +373,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; THR15-NEXT:    [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
 ; THR15-NEXT:    [[TMP27:%.*]] = zext <2 x i8> [[TMP26]] to <2 x i32>
 ; THR15-NEXT:    [[TMP28:%.*]] = sub <2 x i32> [[TMP25]], [[TMP27]]
-; THR15-NEXT:    [[TMP29:%.*]] = shl <2 x i32> [[TMP28]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP29:%.*]] = shl <2 x i32> [[TMP28]], splat (i32 16)
 ; THR15-NEXT:    [[TMP59:%.*]] = add <2 x i32> [[TMP29]], [[TMP23]]
 ; THR15-NEXT:    [[TMP31:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
 ; THR15-NEXT:    [[TMP32:%.*]] = zext <2 x i8> [[TMP31]] to <2 x i32>
@@ -386,7 +386,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; THR15-NEXT:    [[TMP38:%.*]] = shufflevector <4 x i8> [[TMP16]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
 ; THR15-NEXT:    [[TMP39:%.*]] = zext <2 x i8> [[TMP38]] to <2 x i32>
 ; THR15-NEXT:    [[TMP40:%.*]] = sub <2 x i32> [[TMP37]], [[TMP39]]
-; THR15-NEXT:    [[TMP41:%.*]] = shl <2 x i32> [[TMP40]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP41:%.*]] = shl <2 x i32> [[TMP40]], splat (i32 16)
 ; THR15-NEXT:    [[TMP76:%.*]] = add <2 x i32> [[TMP41]], [[TMP35]]
 ; THR15-NEXT:    [[TMP30:%.*]] = add <2 x i32> [[TMP76]], [[TMP59]]
 ; THR15-NEXT:    [[TMP42:%.*]] = sub <2 x i32> [[TMP59]], [[TMP76]]
@@ -410,7 +410,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; THR15-NEXT:    [[TMP55:%.*]] = load <2 x i8>, ptr [[ARRAYIDX5_3]], align 1
 ; THR15-NEXT:    [[TMP56:%.*]] = zext <2 x i8> [[TMP55]] to <2 x i32>
 ; THR15-NEXT:    [[TMP57:%.*]] = sub <2 x i32> [[TMP77]], [[TMP56]]
-; THR15-NEXT:    [[TMP58:%.*]] = shl <2 x i32> [[TMP57]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP58:%.*]] = shl <2 x i32> [[TMP57]], splat (i32 16)
 ; THR15-NEXT:    [[TMP72:%.*]] = add <2 x i32> [[TMP58]], [[TMP52]]
 ; THR15-NEXT:    [[ARRAYIDX20_3:%.*]] = getelementptr i8, ptr null, i64 2
 ; THR15-NEXT:    [[ARRAYIDX22_3:%.*]] = getelementptr i8, ptr null, i64 2
@@ -425,7 +425,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; THR15-NEXT:    [[TMP67:%.*]] = load <2 x i8>, ptr [[ARRAYIDX27_3]], align 1
 ; THR15-NEXT:    [[TMP68:%.*]] = zext <2 x i8> [[TMP67]] to <2 x i32>
 ; THR15-NEXT:    [[TMP69:%.*]] = sub <2 x i32> [[TMP66]], [[TMP68]]
-; THR15-NEXT:    [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP70:%.*]] = shl <2 x i32> [[TMP69]], splat (i32 16)
 ; THR15-NEXT:    [[TMP73:%.*]] = add <2 x i32> [[TMP70]], [[TMP64]]
 ; THR15-NEXT:    [[TMP74:%.*]] = extractelement <2 x i32> [[TMP72]], i32 0
 ; THR15-NEXT:    [[TMP75:%.*]] = extractelement <2 x i32> [[TMP72]], i32 1
@@ -487,7 +487,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; THR15-NEXT:    [[TMP98:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
 ; THR15-NEXT:    [[TMP99:%.*]] = zext <2 x i8> [[TMP98]] to <2 x i32>
 ; THR15-NEXT:    [[TMP100:%.*]] = sub <2 x i32> [[TMP97]], [[TMP99]]
-; THR15-NEXT:    [[TMP101:%.*]] = shl <2 x i32> [[TMP100]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP101:%.*]] = shl <2 x i32> [[TMP100]], splat (i32 16)
 ; THR15-NEXT:    [[TMP102:%.*]] = shufflevector <4 x i8> [[TMP143]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
 ; THR15-NEXT:    [[TMP103:%.*]] = zext <2 x i8> [[TMP102]] to <2 x i32>
 ; THR15-NEXT:    [[TMP104:%.*]] = shufflevector <4 x i8> [[TMP146]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
@@ -495,7 +495,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; THR15-NEXT:    [[TMP106:%.*]] = shufflevector <4 x i8> [[TMP147]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
 ; THR15-NEXT:    [[TMP107:%.*]] = zext <2 x i8> [[TMP106]] to <2 x i32>
 ; THR15-NEXT:    [[TMP108:%.*]] = sub <2 x i32> [[TMP105]], [[TMP107]]
-; THR15-NEXT:    [[TMP109:%.*]] = shl <2 x i32> [[TMP108]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP109:%.*]] = shl <2 x i32> [[TMP108]], splat (i32 16)
 ; THR15-NEXT:    [[TMP110:%.*]] = insertelement <2 x i32> [[TMP93]], i32 [[CONV33]], i32 1
 ; THR15-NEXT:    [[TMP111:%.*]] = sub <2 x i32> [[TMP110]], [[TMP103]]
 ; THR15-NEXT:    [[TMP112:%.*]] = add <2 x i32> [[TMP109]], [[TMP111]]
@@ -531,7 +531,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; THR15-NEXT:    [[TMP129:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> <i32 0, i32 2>
 ; THR15-NEXT:    [[TMP130:%.*]] = zext <2 x i8> [[TMP129]] to <2 x i32>
 ; THR15-NEXT:    [[TMP131:%.*]] = sub <2 x i32> [[TMP128]], [[TMP130]]
-; THR15-NEXT:    [[TMP132:%.*]] = shl <2 x i32> [[TMP131]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP132:%.*]] = shl <2 x i32> [[TMP131]], splat (i32 16)
 ; THR15-NEXT:    [[TMP138:%.*]] = shufflevector <4 x i8> [[TMP148]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
 ; THR15-NEXT:    [[TMP139:%.*]] = zext <2 x i8> [[TMP138]] to <2 x i32>
 ; THR15-NEXT:    [[TMP154:%.*]] = shufflevector <4 x i8> [[TMP152]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
@@ -539,7 +539,7 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; THR15-NEXT:    [[TMP133:%.*]] = shufflevector <4 x i8> [[TMP153]], <4 x i8> poison, <2 x i32> <i32 1, i32 3>
 ; THR15-NEXT:    [[TMP134:%.*]] = zext <2 x i8> [[TMP133]] to <2 x i32>
 ; THR15-NEXT:    [[TMP135:%.*]] = sub <2 x i32> [[TMP155]], [[TMP134]]
-; THR15-NEXT:    [[TMP170:%.*]] = shl <2 x i32> [[TMP135]], <i32 16, i32 16>
+; THR15-NEXT:    [[TMP170:%.*]] = shl <2 x i32> [[TMP135]], splat (i32 16)
 ; THR15-NEXT:    [[TMP140:%.*]] = insertelement <2 x i32> [[TMP124]], i32 [[CONV33_1]], i32 1
 ; THR15-NEXT:    [[TMP141:%.*]] = sub <2 x i32> [[TMP140]], [[TMP139]]
 ; THR15-NEXT:    [[TMP171:%.*]] = add <2 x i32> [[TMP170]], [[TMP141]]
@@ -562,9 +562,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; THR15-NEXT:    [[SHR_I54_1:%.*]] = lshr i32 [[TMP151]], 15
 ; THR15-NEXT:    [[AND_I55_1:%.*]] = and i32 [[SHR_I54_1]], 65537
 ; THR15-NEXT:    [[MUL_I56_1:%.*]] = mul i32 [[AND_I55_1]], 65535
-; THR15-NEXT:    [[TMP156:%.*]] = lshr <2 x i32> [[TMP124]], <i32 15, i32 15>
-; THR15-NEXT:    [[TMP157:%.*]] = and <2 x i32> [[TMP156]], <i32 65537, i32 65537>
-; THR15-NEXT:    [[TMP158:%.*]] = mul <2 x i32> [[TMP157]], <i32 65535, i32 65535>
+; THR15-NEXT:    [[TMP156:%.*]] = lshr <2 x i32> [[TMP124]], splat (i32 15)
+; THR15-NEXT:    [[TMP157:%.*]] = and <2 x i32> [[TMP156]], splat (i32 65537)
+; THR15-NEXT:    [[TMP158:%.*]] = mul <2 x i32> [[TMP157]], splat (i32 65535)
 ; THR15-NEXT:    [[ADD78:%.*]] = add i32 [[ADD48_2]], [[ADD48]]
 ; THR15-NEXT:    [[SUB86:%.*]] = sub i32 [[ADD48]], [[ADD48_2]]
 ; THR15-NEXT:    [[ADD103:%.*]] = add i32 [[ADD94]], [[ADD78]]
@@ -639,9 +639,9 @@ define i32 @test(ptr %pix1, ptr %pix2, i64 %idx.ext, i64 %idx.ext63, ptr %add.pt
 ; THR15-NEXT:    [[SUB106_3:%.*]] = sub i32 [[SUB86_3]], [[SUB102_3]]
 ; THR15-NEXT:    [[ADD_I52_3:%.*]] = add i32 [[MUL_I51_3]], [[ADD105_3]]
 ; THR15-NEXT:    [[XOR_I53_3:%.*]] = xor i32 [[ADD_I52_3]], [[CONV]]
-; THR15-NEXT:    [[TMP179:%.*]] = lshr <2 x i32> [[TMP93]], <i32 15, i32 15>
-; THR15-NEXT:    [[TMP180:%.*]] = and <2 x i32> [[TMP179]], <i32 65537, i32 65537>
-; THR15-NEXT:    [[TMP181:%.*]] = mul <2 x i32> [[TMP180]], <i32 65535, i32 65535>
+; THR15-NEXT:    [[TMP179:%.*]] = lshr <2 x i32> [[TMP93]], splat (i32 15)
+; THR15-NEXT:    [[TMP180:%.*]] = and <2 x i32> [[TMP179]], splat (i32 65537)
+; THR15-NEXT:    [[TMP181:%.*]] = mul <2 x i32> [[TMP180]], splat (i32 65535)
 ; THR15-NEXT:    [[TMP182:%.*]] = add <2 x i32> [[TMP181]], [[TMP178]]
 ; THR15-NEXT:    [[TMP183:%.*]] = xor <2 x i32> [[TMP182]], [[TMP93]]
 ; THR15-NEXT:    [[SHR_I59_3:%.*]] = lshr i32 [[CONV33]], 15
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll
index 85e0b183956d86..da8cd38e3108d7 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll
@@ -62,7 +62,7 @@ define void @fp_sub(ptr %dst, ptr %p) {
 ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], splat (float 3.000000e+00)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -70,7 +70,7 @@ define void @fp_sub(ptr %dst, ptr %p) {
 ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
-; DEFAULT-NEXT:    [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
+; DEFAULT-NEXT:    [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], splat (float 3.000000e+00)
 ; DEFAULT-NEXT:    store <4 x float> [[TMP1]], ptr [[DST]], align 4
 ; DEFAULT-NEXT:    ret void
 ;
@@ -156,7 +156,7 @@ define void @fp_div(ptr %dst, ptr %p) {
 ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], <float 1.050000e+01, float 1.050000e+01, float 1.050000e+01, float 1.050000e+01>
+; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], splat (float 1.050000e+01)
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -164,7 +164,7 @@ define void @fp_div(ptr %dst, ptr %p) {
 ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
-; DEFAULT-NEXT:    [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], <float 1.050000e+01, float 1.050000e+01, float 1.050000e+01, float 1.050000e+01>
+; DEFAULT-NEXT:    [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], splat (float 1.050000e+01)
 ; DEFAULT-NEXT:    store <4 x float> [[TMP1]], ptr [[DST]], align 4
 ; DEFAULT-NEXT:    ret void
 ;
@@ -254,7 +254,7 @@ define void @fp_min(ptr %dst, ptr %p) {
 ; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> <float 1.250000e+00, float 1.250000e+00, float 1.250000e+00, float 1.250000e+00>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> splat (float 1.250000e+00))
 ; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -262,7 +262,7 @@ define void @fp_min(ptr %dst, ptr %p) {
 ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
-; DEFAULT-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> <float 1.250000e+00, float 1.250000e+00, float 1.250000e+00, float 1.250000e+00>)
+; DEFAULT-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> splat (float 1.250000e+00))
 ; DEFAULT-NEXT:    store <4 x float> [[TMP1]], ptr [[DST]], align 4
 ; DEFAULT-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll
index 3872cb0c3a340c..fc06866c1e9553 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/getpointerschaincost.ll
@@ -12,7 +12,7 @@ define void @f(ptr %dest, i64 %i) {
 ; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[P1]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[P1]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -46,7 +46,7 @@ define void @g(ptr %dest, i64 %i) {
 ; CHECK-SAME: (ptr [[DEST:%.*]], i64 [[I:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr i32, ptr [[DEST]], i32 2048
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[P1]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[P1]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -77,7 +77,7 @@ define void @h(ptr %dest, i32 %i) {
 ; CHECK-SAME: (ptr [[DEST:%.*]], i32 [[I:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P1:%.*]] = getelementptr [4 x i32], ptr [[DEST]], i32 [[I]], i32 0
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[P1]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[P1]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll
index 747a1ae2b5ce4c..d9a9e9bf1a7736 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll
@@ -7,14 +7,14 @@ define void @vec_add(ptr %dest, ptr %p) {
 ; CHECK-LABEL: @vec_add(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i16> [[TMP0]], <i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i16> [[TMP0]], splat (i16 1)
 ; CHECK-NEXT:    store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; DEFAULT-LABEL: @vec_add(
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4
-; DEFAULT-NEXT:    [[TMP1:%.*]] = add <2 x i16> [[TMP0]], <i16 1, i16 1>
+; DEFAULT-NEXT:    [[TMP1:%.*]] = add <2 x i16> [[TMP0]], splat (i16 1)
 ; DEFAULT-NEXT:    store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4
 ; DEFAULT-NEXT:    ret void
 ;
@@ -36,14 +36,14 @@ define void @vec_sub(ptr %dest, ptr %p) {
 ; CHECK-LABEL: @vec_sub(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], <i16 17, i16 17>
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], splat (i16 17)
 ; CHECK-NEXT:    store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; DEFAULT-LABEL: @vec_sub(
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4
-; DEFAULT-NEXT:    [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], <i16 17, i16 17>
+; DEFAULT-NEXT:    [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], splat (i16 17)
 ; DEFAULT-NEXT:    store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4
 ; DEFAULT-NEXT:    ret void
 ;
@@ -65,14 +65,14 @@ define void @vec_rsub(ptr %dest, ptr %p) {
 ; CHECK-LABEL: @vec_rsub(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i16> <i16 29, i16 29>, [[TMP0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub <2 x i16> splat (i16 29), [[TMP0]]
 ; CHECK-NEXT:    store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; DEFAULT-LABEL: @vec_rsub(
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4
-; DEFAULT-NEXT:    [[TMP1:%.*]] = sub <2 x i16> <i16 29, i16 29>, [[TMP0]]
+; DEFAULT-NEXT:    [[TMP1:%.*]] = sub <2 x i16> splat (i16 29), [[TMP0]]
 ; DEFAULT-NEXT:    store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4
 ; DEFAULT-NEXT:    ret void
 ;
@@ -94,14 +94,14 @@ define void @vec_mul(ptr %dest, ptr %p) {
 ; CHECK-LABEL: @vec_mul(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], <i16 7, i16 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], splat (i16 7)
 ; CHECK-NEXT:    store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; DEFAULT-LABEL: @vec_mul(
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4
-; DEFAULT-NEXT:    [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], <i16 7, i16 7>
+; DEFAULT-NEXT:    [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], splat (i16 7)
 ; DEFAULT-NEXT:    store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4
 ; DEFAULT-NEXT:    ret void
 ;
@@ -123,14 +123,14 @@ define void @vec_sdiv(ptr %dest, ptr %p) {
 ; CHECK-LABEL: @vec_sdiv(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], <i16 7, i16 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], splat (i16 7)
 ; CHECK-NEXT:    store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; DEFAULT-LABEL: @vec_sdiv(
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4
-; DEFAULT-NEXT:    [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], <i16 7, i16 7>
+; DEFAULT-NEXT:    [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], splat (i16 7)
 ; DEFAULT-NEXT:    store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4
 ; DEFAULT-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll
index 052b461b8d7501..a6c5273dc4cd0d 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll
@@ -32,14 +32,14 @@ define void @vec_add(ptr %dest, ptr %p) {
 ; CHECK-LABEL: @vec_add(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i16> [[TMP0]], <i16 1, i16 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i16> [[TMP0]], splat (i16 1)
 ; CHECK-NEXT:    store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; DEFAULT-LABEL: @vec_add(
 ; DEFAULT-NEXT:  entry:
 ; DEFAULT-NEXT:    [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4
-; DEFAULT-NEXT:    [[TMP1:%.*]] = add <2 x i16> [[TMP0]], <i16 1, i16 1>
+; DEFAULT-NEXT:    [[TMP1:%.*]] = add <2 x i16> [[TMP0]], splat (i16 1)
 ; DEFAULT-NEXT:    store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4
 ; DEFAULT-NEXT:    ret void
 ;
@@ -179,12 +179,12 @@ entry:
 define void @splat_store_i32_one(ptr %dest) {
 ; CHECK-LABEL: @splat_store_i32_one(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[DEST:%.*]], align 4
+; CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[DEST:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 ; DEFAULT-LABEL: @splat_store_i32_one(
 ; DEFAULT-NEXT:  entry:
-; DEFAULT-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[DEST:%.*]], align 4
+; DEFAULT-NEXT:    store <4 x i32> splat (i32 1), ptr [[DEST:%.*]], align 4
 ; DEFAULT-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/minbw-with-and-and-scalar-trunc.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/minbw-with-and-and-scalar-trunc.ll
index d6dc3bcc3354c1..ddf57994217098 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/minbw-with-and-and-scalar-trunc.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/minbw-with-and-and-scalar-trunc.ll
@@ -12,7 +12,7 @@ define i16 @test() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 4)
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16>
-; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], <i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], splat (i16 -1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext i16 [[TMP4]] to i32
 ; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[TMP5]] to i16
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/phi-const.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/phi-const.ll
index 3982a4523af40e..7d7e09758b9221 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/phi-const.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/phi-const.ll
@@ -47,7 +47,7 @@ define void @g(ptr %p, i1 %c) {
 ; CHECK-NEXT:    br label [[D]]
 ; CHECK:       d:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i8> [ <i8 1, i8 -1>, [[A]] ], [ <i8 -1, i8 1>, [[B]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], <i8 1, i8 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 1)
 ; CHECK-NEXT:    store <2 x i8> [[TMP2]], ptr [[P_0]], align 1
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll
index 7771e8369b6198..c89a82dd359637 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll
@@ -6,7 +6,7 @@ define i32 @test(ptr %0, ptr %1) {
 ; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LOAD_5:%.*]] = load i32, ptr [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> <i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> splat (i8 1))
 ; CHECK-NEXT:    [[TMP3:%.*]] = sext i8 [[TMP2]] to i32
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = and i32 [[TMP3]], [[LOAD_5]]
 ; CHECK-NEXT:    ret i32 [[OP_RDX]]
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll
index 9cfc5f86cb014a..063bc0458cadc6 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll
@@ -8,8 +8,8 @@ define i32 @pow2_zero_constant_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[B]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[C]], i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[D]], i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], <i16 1, i16 1, i16 1, i16 1>
-; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> <i32 65536, i32 65536, i32 65536, i32 65536>, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], splat (i16 1)
+; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> splat (i32 65536), <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]])
 ; CHECK-NEXT:    ret i32 [[TMP7]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll
index 54b7b1192ec973..8db3a8b6ff2198 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/smin-signed-zextended.ll
@@ -9,11 +9,11 @@ define <4 x i32> @test(i16 %0, i16 %1) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
 ; CHECK-NEXT:    [[CONV15_I:%.*]] = sext i16 [[TMP0]] to i32
-; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[CONV15_I]], i32 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP5]], <4 x i32> [[TMP7]])
-; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i32> [[TMP8]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i32> [[TMP8]], splat (i32 65535)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP9]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll
index 3ca5733a6bad02..89539739ebcb9d 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-bv-multi-uses.ll
@@ -8,7 +8,7 @@ define i32 @test(i64 %v1, i64 %v2) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[V1]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[V2]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = trunc <2 x i64> [[TMP1]] to <2 x i32>
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr <2 x i64> [[TMP1]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc <2 x i64> [[TMP4]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i32> [[TMP2]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll
index 0c0c723e669961..60bd9bb124e1c2 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/trunc-to-large-than-bw.ll
@@ -9,7 +9,7 @@ define i32 @test() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i64(ptr align 8 @c, i64 24, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, i32 4)
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i64> [[TMP0]] to <4 x i16>
-; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], <i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i16> [[TMP1]], splat (i16 -1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[TMP3]])
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext i16 [[TMP4]] to i32
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP5]], i32 1)
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll
index 4f855e20fac4db..21312c28891d52 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/unsigned-node-trunc-with-signed-users.ll
@@ -11,18 +11,18 @@ define void @test(ptr %p, i16 %load794) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext <2 x i16> [[TMP2]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP7:%.*]] = sub nsw <2 x i32> [[TMP4]], [[TMP3]]
-; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <2 x i32> [[TMP7]], <i32 3329, i32 3329>
+; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <2 x i32> [[TMP7]], splat (i32 3329)
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[ZEXT795]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul <2 x i32> [[TMP8]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext <2 x i32> [[TMP12]] to <2 x i64>
-; CHECK-NEXT:    [[TMP10:%.*]] = mul nuw nsw <2 x i64> [[TMP9]], <i64 5039, i64 5039>
-; CHECK-NEXT:    [[TMP11:%.*]] = lshr <2 x i64> [[TMP10]], <i64 24, i64 24>
+; CHECK-NEXT:    [[TMP22:%.*]] = mul nuw nsw <2 x i64> [[TMP9]], splat (i64 5039)
+; CHECK-NEXT:    [[TMP11:%.*]] = lshr <2 x i64> [[TMP22]], splat (i64 24)
 ; CHECK-NEXT:    [[TMP13:%.*]] = trunc <2 x i64> [[TMP11]] to <2 x i32>
-; CHECK-NEXT:    [[TMP20:%.*]] = mul <2 x i32> [[TMP13]], <i32 62207, i32 62207>
+; CHECK-NEXT:    [[TMP20:%.*]] = mul <2 x i32> [[TMP13]], splat (i32 62207)
 ; CHECK-NEXT:    [[TMP21:%.*]] = add <2 x i32> [[TMP20]], [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = trunc <2 x i32> [[TMP21]] to <2 x i16>
-; CHECK-NEXT:    [[TMP15:%.*]] = add <2 x i16> [[TMP14]], <i16 -3329, i16 -3329>
+; CHECK-NEXT:    [[TMP15:%.*]] = add <2 x i16> [[TMP14]], splat (i16 -3329)
 ; CHECK-NEXT:    [[TMP16:%.*]] = icmp slt <2 x i16> [[TMP15]], zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> [[TMP14]], <2 x i16> zeroinitializer
 ; CHECK-NEXT:    [[TMP18:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[TMP15]], <2 x i16> zeroinitializer)
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll
index 3a1a8fb4b2e32f..073c8847b15a1c 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec15-base.ll
@@ -8,7 +8,7 @@ define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) {
 ; NON-POW2-NEXT:  entry:
 ; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
 ; NON-POW2-NEXT:    [[TMP0:%.*]] = load <15 x i8>, ptr [[GEP_SRC_0]], align 4
-; NON-POW2-NEXT:    [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
+; NON-POW2-NEXT:    [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], splat (i8 10)
 ; NON-POW2-NEXT:    store <15 x i8> [[TMP1]], ptr [[DST]], align 1
 ; NON-POW2-NEXT:    ret void
 ;
@@ -17,17 +17,17 @@ define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) {
 ; POW2-ONLY-NEXT:  entry:
 ; POW2-ONLY-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
 ; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4
-; POW2-ONLY-NEXT:    [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
+; POW2-ONLY-NEXT:    [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], splat (i8 10)
 ; POW2-ONLY-NEXT:    store <8 x i8> [[TMP1]], ptr [[DST]], align 1
 ; POW2-ONLY-NEXT:    [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8
 ; POW2-ONLY-NEXT:    [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8
 ; POW2-ONLY-NEXT:    [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4
-; POW2-ONLY-NEXT:    [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], <i8 10, i8 10, i8 10, i8 10>
+; POW2-ONLY-NEXT:    [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], splat (i8 10)
 ; POW2-ONLY-NEXT:    store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1
 ; POW2-ONLY-NEXT:    [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12
 ; POW2-ONLY-NEXT:    [[DST_12:%.*]] = getelementptr i8, ptr [[DST]], i8 12
 ; POW2-ONLY-NEXT:    [[TMP4:%.*]] = load <2 x i8>, ptr [[GEP_SRC_12]], align 4
-; POW2-ONLY-NEXT:    [[TMP5:%.*]] = mul nsw <2 x i8> [[TMP4]], <i8 10, i8 10>
+; POW2-ONLY-NEXT:    [[TMP5:%.*]] = mul nsw <2 x i8> [[TMP4]], splat (i8 10)
 ; POW2-ONLY-NEXT:    store <2 x i8> [[TMP5]], ptr [[DST_12]], align 1
 ; POW2-ONLY-NEXT:    [[GEP_SRC_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 14
 ; POW2-ONLY-NEXT:    [[L_SRC_14:%.*]] = load i8, ptr [[GEP_SRC_14]], align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
index 308d0e27f1ea89..7ab5e4d6cb787e 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
@@ -7,7 +7,7 @@ define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) {
 ; NON-POW2-NEXT:  entry:
 ; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
 ; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
-; NON-POW2-NEXT:    [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], <i32 10, i32 10, i32 10>
+; NON-POW2-NEXT:    [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], splat (i32 10)
 ; NON-POW2-NEXT:    store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
 ; NON-POW2-NEXT:    ret void
 ;
@@ -18,7 +18,7 @@ define void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) {
 ; POW2-ONLY-NEXT:    [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
 ; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
 ; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_0]], align 4
-; POW2-ONLY-NEXT:    [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], <i32 10, i32 10>
+; POW2-ONLY-NEXT:    [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], splat (i32 10)
 ; POW2-ONLY-NEXT:    store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
 ; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
 ; POW2-ONLY-NEXT:    store i32 [[MUL_2]], ptr [[DST_2]], align 4
@@ -56,7 +56,7 @@ define void @v3_load_i32_udiv_by_constant_store(ptr %src, ptr %dst) {
 ; NON-POW2-NEXT:  entry:
 ; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
 ; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
-; NON-POW2-NEXT:    [[TMP1:%.*]] = udiv <3 x i32> <i32 10, i32 10, i32 10>, [[TMP0]]
+; NON-POW2-NEXT:    [[TMP1:%.*]] = udiv <3 x i32> splat (i32 10), [[TMP0]]
 ; NON-POW2-NEXT:    store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
 ; NON-POW2-NEXT:    ret void
 ;
@@ -170,7 +170,7 @@ define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) {
 ; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4
 ; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4
 ; NON-POW2-NEXT:    [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]]
-; NON-POW2-NEXT:    [[TMP3:%.*]] = add <3 x i32> [[TMP2]], <i32 9, i32 9, i32 9>
+; NON-POW2-NEXT:    [[TMP3:%.*]] = add <3 x i32> [[TMP2]], splat (i32 9)
 ; NON-POW2-NEXT:    store <3 x i32> [[TMP3]], ptr [[DST:%.*]], align 4
 ; NON-POW2-NEXT:    ret void
 ;
@@ -187,7 +187,7 @@ define void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) {
 ; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4
 ; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4
 ; POW2-ONLY-NEXT:    [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]]
-; POW2-ONLY-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP2]], <i32 9, i32 9>
+; POW2-ONLY-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP2]], splat (i32 9)
 ; POW2-ONLY-NEXT:    store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4
 ; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
 ; POW2-ONLY-NEXT:    store i32 [[ADD_2]], ptr [[DST_2]], align 4
@@ -231,7 +231,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) {
 ; NON-POW2-NEXT:  entry:
 ; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
 ; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4
-; NON-POW2-NEXT:    [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>
+; NON-POW2-NEXT:    [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01)
 ; NON-POW2-NEXT:    store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4
 ; NON-POW2-NEXT:    ret void
 ;
@@ -242,7 +242,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) {
 ; POW2-ONLY-NEXT:    [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
 ; POW2-ONLY-NEXT:    [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01
 ; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4
-; POW2-ONLY-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01>
+; POW2-ONLY-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01)
 ; POW2-ONLY-NEXT:    store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4
 ; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2
 ; POW2-ONLY-NEXT:    store float [[FADD_2]], ptr [[DST_2]], align 4
@@ -467,7 +467,7 @@ define i32 @reduce_add_after_mul(ptr %src) {
 ; NON-POW2-LABEL: @reduce_add_after_mul(
 ; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
 ; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
-; NON-POW2-NEXT:    [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP1]], <i32 10, i32 10, i32 10>
+; NON-POW2-NEXT:    [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP1]], splat (i32 10)
 ; NON-POW2-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP2]])
 ; NON-POW2-NEXT:    ret i32 [[TMP3]]
 ;
@@ -798,7 +798,7 @@ define float @reduce_fadd_after_fmul_of_buildvec(float %a, float %b, float %c) {
 ; NON-POW2-NEXT:    [[TMP1:%.*]] = insertelement <3 x float> poison, float [[A:%.*]], i32 0
 ; NON-POW2-NEXT:    [[TMP2:%.*]] = insertelement <3 x float> [[TMP1]], float [[B:%.*]], i32 1
 ; NON-POW2-NEXT:    [[TMP3:%.*]] = insertelement <3 x float> [[TMP2]], float [[C:%.*]], i32 2
-; NON-POW2-NEXT:    [[TMP4:%.*]] = fmul fast <3 x float> [[TMP3]], <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>
+; NON-POW2-NEXT:    [[TMP4:%.*]] = fmul fast <3 x float> [[TMP3]], splat (float 1.000000e+01)
 ; NON-POW2-NEXT:    [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP4]])
 ; NON-POW2-NEXT:    ret float [[TMP5]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll
index a7bb272b44dc4b..ca93cbd698adac 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-not-resized-op-resized.ll
@@ -6,7 +6,7 @@ define void @test(i64 %0, i1 %.cmp.i.2, i1 %1, ptr %a) {
 ; CHECK-SAME: i64 [[TMP0:%.*]], i1 [[DOTCMP_I_2:%.*]], i1 [[TMP1:%.*]], ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], <i64 63, i64 63, i64 63, i64 63>
+; CHECK-NEXT:    [[TMP5:%.*]] = lshr <4 x i64> [[TMP4]], splat (i64 63)
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i1> poison, i1 [[DOTCMP_I_2]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i1> [[TMP6]], i1 [[TMP1]], i32 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP7]], <4 x i1> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
@@ -14,7 +14,7 @@ define void @test(i64 %0, i1 %.cmp.i.2, i1 %1, ptr %a) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = trunc <4 x i64> [[TMP5]] to <4 x i1>
 ; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP10]], <4 x i1> [[TMP8]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = zext <4 x i1> [[TMP11]] to <4 x i32>
-; CHECK-NEXT:    [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP13]])
 ; CHECK-NEXT:    store i32 [[TMP14]], ptr [[A]], align 4
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
index 9cdb7c228e3827..9e6270376ddd43 100644
--- a/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
+++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/pr34619.ll
@@ -16,7 +16,7 @@ define void @foo() {
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP2]], <2 x i32> [[TMP1]], i64 2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> <i32 undef, i32 poison, i32 undef, i32 undef>, i32 [[ADD277]], i32 1
 ; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = ashr <4 x i32> [[TMP5]], <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT:    [[TMP6:%.*]] = ashr <4 x i32> [[TMP5]], splat (i32 6)
 ; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr [[ARRAYIDX372]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
index d8845f20ecd72e..5ebbb76f3d6c33 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35628_2.ll
@@ -16,8 +16,8 @@ define void @test() #0 {
 ; CHECK-NEXT:    [[DUMMY_ADD:%.*]] = add i16 0, 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i64> [[TMP3]], i32 0
 ; CHECK-NEXT:    [[DUMMY_SHL:%.*]] = shl i64 [[TMP5]], 32
-; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i64> <i64 1, i64 1, i64 1, i64 1>, [[TMP3]]
-; CHECK-NEXT:    [[TMP7:%.*]] = ashr exact <4 x i64> [[TMP6]], <i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i64> splat (i64 1), [[TMP3]]
+; CHECK-NEXT:    [[TMP7:%.*]] = ashr exact <4 x i64> [[TMP6]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP7]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = add i64 [[TMP8]], [[TMP4]]
 ; CHECK-NEXT:    [[OP_RDX1]] = add i64 [[OP_RDX]], 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
index 8e878f3f8b80fa..8839fc22817888 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
@@ -171,17 +171,17 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
 define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) {
 ; SSE-LABEL: @ashr_shl_v8i32_const(
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
+; SSE-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2)
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT:    [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
+; SSE-NEXT:    [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3)
 ; SSE-NEXT:    [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SSE-NEXT:    ret <8 x i32> [[R71]]
 ;
 ; SLM-LABEL: @ashr_shl_v8i32_const(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SLM-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
+; SLM-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2)
 ; SLM-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SLM-NEXT:    [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
+; SLM-NEXT:    [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3)
 ; SLM-NEXT:    [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SLM-NEXT:    ret <8 x i32> [[R71]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll
index 0b17e19e4fadd8..dfa918a6ea4532 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll
@@ -171,17 +171,17 @@ define <8 x i32> @ashr_shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
 define <8 x i32> @ashr_shl_v8i32_const(<8 x i32> %a) {
 ; SSE-LABEL: @ashr_shl_v8i32_const(
 ; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
+; SSE-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2)
 ; SSE-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SSE-NEXT:    [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
+; SSE-NEXT:    [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3)
 ; SSE-NEXT:    [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SSE-NEXT:    ret <8 x i32> [[R71]]
 ;
 ; SLM-LABEL: @ashr_shl_v8i32_const(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SLM-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2>
+; SLM-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 2)
 ; SLM-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-; SLM-NEXT:    [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], <i32 3, i32 3, i32 3, i32 3>
+; SLM-NEXT:    [[TMP4:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 3)
 ; SLM-NEXT:    [[R71:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; SLM-NEXT:    ret <8 x i32> [[R71]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll
index 2c16612722c6a7..6187dd9ea910d8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll
@@ -15,7 +15,7 @@ define void @and4(ptr noalias nocapture noundef writeonly %dst, ptr noalias noca
 ; CHECK-LABEL: @and4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i8> [[TMP0]], <i8 -64, i8 -64, i8 -64, i8 -64>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i8> [[TMP0]], splat (i8 -64)
 ; CHECK-NEXT:    store <4 x i8> [[TMP1]], ptr [[DST:%.*]], align 1
 ; CHECK-NEXT:    ret void
 ;
@@ -45,7 +45,7 @@ define void @and8(ptr noalias nocapture noundef writeonly %dst, ptr noalias noca
 ; CHECK-LABEL: @and8(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i8> [[TMP0]], <i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i8> [[TMP0]], splat (i8 -64)
 ; CHECK-NEXT:    store <8 x i8> [[TMP1]], ptr [[DST:%.*]], align 1
 ; CHECK-NEXT:    ret void
 ;
@@ -95,7 +95,7 @@ define void @and16(ptr noalias nocapture noundef writeonly %dst, ptr noalias noc
 ; CHECK-LABEL: @and16(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
-; CHECK-NEXT:    [[TMP1:%.*]] = and <16 x i8> [[TMP0]], <i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <16 x i8> [[TMP0]], splat (i8 -64)
 ; CHECK-NEXT:    store <16 x i8> [[TMP1]], ptr [[DST:%.*]], align 1
 ; CHECK-NEXT:    ret void
 ;
@@ -185,19 +185,19 @@ define void @and32(ptr noalias nocapture noundef writeonly %dst, ptr noalias noc
 ; SSE-LABEL: @and32(
 ; SSE-NEXT:  entry:
 ; SSE-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 1
-; SSE-NEXT:    [[TMP1:%.*]] = and <16 x i8> [[TMP0]], <i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64>
+; SSE-NEXT:    [[TMP1:%.*]] = and <16 x i8> [[TMP0]], splat (i8 -64)
 ; SSE-NEXT:    store <16 x i8> [[TMP1]], ptr [[DST:%.*]], align 1
 ; SSE-NEXT:    [[ARRAYIDX_16:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 16
 ; SSE-NEXT:    [[ARRAYIDX3_16:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 16
 ; SSE-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX_16]], align 1
-; SSE-NEXT:    [[TMP3:%.*]] = and <16 x i8> [[TMP2]], <i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64>
+; SSE-NEXT:    [[TMP3:%.*]] = and <16 x i8> [[TMP2]], splat (i8 -64)
 ; SSE-NEXT:    store <16 x i8> [[TMP3]], ptr [[ARRAYIDX3_16]], align 1
 ; SSE-NEXT:    ret void
 ;
 ; AVX-LABEL: @and32(
 ; AVX-NEXT:  entry:
 ; AVX-NEXT:    [[TMP0:%.*]] = load <32 x i8>, ptr [[SRC:%.*]], align 1
-; AVX-NEXT:    [[TMP1:%.*]] = and <32 x i8> [[TMP0]], <i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64, i8 -64>
+; AVX-NEXT:    [[TMP1:%.*]] = and <32 x i8> [[TMP0]], splat (i8 -64)
 ; AVX-NEXT:    store <32 x i8> [[TMP1]], ptr [[DST:%.*]], align 1
 ; AVX-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll
index 33fd3e6dc0e09f..25652e026a8374 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll
@@ -25,46 +25,46 @@
 define void @sdiv_v16i32_uniformconst() {
 ; SSE-LABEL: @sdiv_v16i32_uniformconst(
 ; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
-; SSE-NEXT:    [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
-; SSE-NEXT:    [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @sdiv_v16i32_uniformconst(
 ; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
-; SLM-NEXT:    [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP2:%.*]] = sdiv <4 x i32> [[TMP1]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
 ; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
-; SLM-NEXT:    [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
-; SLM-NEXT:    [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP6:%.*]] = sdiv <4 x i32> [[TMP5]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
-; SLM-NEXT:    [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP8:%.*]] = sdiv <4 x i32> [[TMP7]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @sdiv_v16i32_uniformconst(
 ; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
-; AVX-NEXT:    [[TMP2:%.*]] = sdiv <8 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX-NEXT:    [[TMP2:%.*]] = sdiv <8 x i32> [[TMP1]], splat (i32 5)
 ; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = sdiv <8 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX-NEXT:    [[TMP4:%.*]] = sdiv <8 x i32> [[TMP3]], splat (i32 5)
 ; AVX-NEXT:    store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @sdiv_v16i32_uniformconst(
 ; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = sdiv <16 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:    [[TMP2:%.*]] = sdiv <16 x i32> [[TMP1]], splat (i32 5)
 ; AVX512-NEXT:    store <16 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
@@ -122,46 +122,46 @@ define void @sdiv_v16i32_uniformconst() {
 define void @srem_v16i32_uniformconst() {
 ; SSE-LABEL: @srem_v16i32_uniformconst(
 ; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
-; SSE-NEXT:    [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
-; SSE-NEXT:    [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @srem_v16i32_uniformconst(
 ; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
-; SLM-NEXT:    [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP2:%.*]] = srem <4 x i32> [[TMP1]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
 ; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
-; SLM-NEXT:    [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP4:%.*]] = srem <4 x i32> [[TMP3]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
-; SLM-NEXT:    [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP6:%.*]] = srem <4 x i32> [[TMP5]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
-; SLM-NEXT:    [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP8:%.*]] = srem <4 x i32> [[TMP7]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @srem_v16i32_uniformconst(
 ; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
-; AVX-NEXT:    [[TMP2:%.*]] = srem <8 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX-NEXT:    [[TMP2:%.*]] = srem <8 x i32> [[TMP1]], splat (i32 5)
 ; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = srem <8 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX-NEXT:    [[TMP4:%.*]] = srem <8 x i32> [[TMP3]], splat (i32 5)
 ; AVX-NEXT:    store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @srem_v16i32_uniformconst(
 ; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = srem <16 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:    [[TMP2:%.*]] = srem <16 x i32> [[TMP1]], splat (i32 5)
 ; AVX512-NEXT:    store <16 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
@@ -219,46 +219,46 @@ define void @srem_v16i32_uniformconst() {
 define void @udiv_v16i32_uniformconst() {
 ; SSE-LABEL: @udiv_v16i32_uniformconst(
 ; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
-; SSE-NEXT:    [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
-; SSE-NEXT:    [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @udiv_v16i32_uniformconst(
 ; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
-; SLM-NEXT:    [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP2:%.*]] = udiv <4 x i32> [[TMP1]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
 ; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
-; SLM-NEXT:    [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP4:%.*]] = udiv <4 x i32> [[TMP3]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
-; SLM-NEXT:    [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP6:%.*]] = udiv <4 x i32> [[TMP5]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
-; SLM-NEXT:    [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP8:%.*]] = udiv <4 x i32> [[TMP7]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @udiv_v16i32_uniformconst(
 ; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
-; AVX-NEXT:    [[TMP2:%.*]] = udiv <8 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX-NEXT:    [[TMP2:%.*]] = udiv <8 x i32> [[TMP1]], splat (i32 5)
 ; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = udiv <8 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX-NEXT:    [[TMP4:%.*]] = udiv <8 x i32> [[TMP3]], splat (i32 5)
 ; AVX-NEXT:    store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @udiv_v16i32_uniformconst(
 ; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = udiv <16 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:    [[TMP2:%.*]] = udiv <16 x i32> [[TMP1]], splat (i32 5)
 ; AVX512-NEXT:    store <16 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
@@ -316,46 +316,46 @@ define void @udiv_v16i32_uniformconst() {
 define void @urem_v16i32_uniformconst() {
 ; SSE-LABEL: @urem_v16i32_uniformconst(
 ; SSE-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
-; SSE-NEXT:    [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
 ; SSE-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
-; SSE-NEXT:    [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
 ; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
-; SSE-NEXT:    [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
-; SSE-NEXT:    [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
+; SSE-NEXT:    [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 5)
 ; SSE-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SSE-NEXT:    ret void
 ;
 ; SLM-LABEL: @urem_v16i32_uniformconst(
 ; SLM-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @a32, align 4
-; SLM-NEXT:    [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP2:%.*]] = urem <4 x i32> [[TMP1]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP2]], ptr @c32, align 4
 ; SLM-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 4), align 4
-; SLM-NEXT:    [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP4:%.*]] = urem <4 x i32> [[TMP3]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 4), align 4
 ; SLM-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
-; SLM-NEXT:    [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP6:%.*]] = urem <4 x i32> [[TMP5]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP6]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; SLM-NEXT:    [[TMP7:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 12), align 4
-; SLM-NEXT:    [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], <i32 5, i32 5, i32 5, i32 5>
+; SLM-NEXT:    [[TMP8:%.*]] = urem <4 x i32> [[TMP7]], splat (i32 5)
 ; SLM-NEXT:    store <4 x i32> [[TMP8]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 12), align 4
 ; SLM-NEXT:    ret void
 ;
 ; AVX-LABEL: @urem_v16i32_uniformconst(
 ; AVX-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr @a32, align 4
-; AVX-NEXT:    [[TMP2:%.*]] = urem <8 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX-NEXT:    [[TMP2:%.*]] = urem <8 x i32> [[TMP1]], splat (i32 5)
 ; AVX-NEXT:    store <8 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr getelementptr inbounds ([16 x i32], ptr @a32, i32 0, i64 8), align 4
-; AVX-NEXT:    [[TMP4:%.*]] = urem <8 x i32> [[TMP3]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX-NEXT:    [[TMP4:%.*]] = urem <8 x i32> [[TMP3]], splat (i32 5)
 ; AVX-NEXT:    store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([16 x i32], ptr @c32, i32 0, i64 8), align 4
 ; AVX-NEXT:    ret void
 ;
 ; AVX512-LABEL: @urem_v16i32_uniformconst(
 ; AVX512-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr @a32, align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = urem <16 x i32> [[TMP1]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+; AVX512-NEXT:    [[TMP2:%.*]] = urem <16 x i32> [[TMP1]], splat (i32 5)
 ; AVX512-NEXT:    store <16 x i32> [[TMP2]], ptr @c32, align 4
 ; AVX512-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll
index 16977c025e3eaa..45294e581e6aea 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll
@@ -854,13 +854,13 @@ define void @fshl_v2i32_uniformconst() {
 ;
 ; AVX256-LABEL: @fshl_v2i32_uniformconst(
 ; AVX256-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4
-; AVX256-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
+; AVX256-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1))
 ; AVX256-NEXT:    store <2 x i32> [[TMP2]], ptr @d32, align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fshl_v2i32_uniformconst(
 ; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
+; AVX512-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1))
 ; AVX512-NEXT:    store <2 x i32> [[TMP2]], ptr @d32, align 4
 ; AVX512-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll
index 090a9daa6a1136..d2002b4eedaf40 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll
@@ -854,13 +854,13 @@ define void @fshr_v2i32_uniformconst() {
 ;
 ; AVX256-LABEL: @fshr_v2i32_uniformconst(
 ; AVX256-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4
-; AVX256-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
+; AVX256-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1))
 ; AVX256-NEXT:    store <2 x i32> [[TMP2]], ptr @d32, align 4
 ; AVX256-NEXT:    ret void
 ;
 ; AVX512-LABEL: @fshr_v2i32_uniformconst(
 ; AVX512-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @a32, align 4
-; AVX512-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
+; AVX512-NEXT:    [[TMP2:%.*]] = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> splat (i32 1))
 ; AVX512-NEXT:    store <2 x i32> [[TMP2]], ptr @d32, align 4
 ; AVX512-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
index d388fd17925a16..f46a5d84a86cc9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
@@ -13,7 +13,7 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
 ; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll
index 4af23a6c965100..f1b094e9bbed4a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/c-ray.ll
@@ -80,7 +80,7 @@ define i32 @ray_sphere(ptr nocapture noundef readonly %sph, ptr nocapture nounde
 ; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[CMP93]], i1 [[CMP94]], i1 false
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[CLEANUP]], label [[LOR_LHS_FALSE:%.*]]
 ; CHECK:       lor.lhs.false:
-; CHECK-NEXT:    [[TMP36:%.*]] = fcmp ule <2 x double> [[TMP33]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP36:%.*]] = fcmp ule <2 x double> [[TMP33]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <2 x i1> [[TMP36]], i32 0
 ; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <2 x i1> [[TMP36]], i32 1
 ; CHECK-NEXT:    [[OR_COND106:%.*]] = select i1 [[TMP38]], i1 true, i1 [[TMP37]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll b/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll
index e8548e0467381e..860d0ed29332c2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cast-operand-extracted.ll
@@ -17,7 +17,7 @@ define void @test(ptr %0, i32 %add651) {
 ; CHECK-NEXT:    [[ARRAYIDX660:%.*]] = getelementptr i8, ptr [[TMP4]], i64 7800
 ; CHECK-NEXT:    [[ARRAYIDX689:%.*]] = getelementptr i8, ptr [[TMP4]], i64 7816
 ; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i32> [[TMP3]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i32> [[TMP3]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> <i32 1, i32 poison>, <2 x i32> <i32 2, i32 1>
@@ -25,7 +25,7 @@ define void @test(ptr %0, i32 %add651) {
 ; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> poison, i32 [[ADD651]], i32 0
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP2]], i32 1
 ; CHECK-NEXT:    [[TMP14:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP13]], <2 x i32> [[TMP10]], i64 2)
-; CHECK-NEXT:    [[TMP15:%.*]] = lshr <4 x i32> [[TMP14]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP15:%.*]] = lshr <4 x i32> [[TMP14]], splat (i32 1)
 ; CHECK-NEXT:    [[SHR685:%.*]] = lshr i32 [[TMP2]], 1
 ; CHECK-NEXT:    [[TMP16:%.*]] = trunc <4 x i32> [[TMP15]] to <4 x i16>
 ; CHECK-NEXT:    [[CONV686:%.*]] = trunc i32 [[SHR685]] to i16
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
index 0b9a053d49204c..926cc7b906efe1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll
@@ -14,7 +14,7 @@ define i32 @foo(ptr noalias nocapture %A, ptr noalias nocapture %B, double %G) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = fcmp une <2 x double> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[G:%.*]], i32 0
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[SHUFFLE]], <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> [[TMP2]], <2 x double> [[SHUFFLE]], <2 x double> splat (double 1.000000e+00)
 ; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    ret i32 undef
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll
index 133e64e5bd00d8..54a81a3ce723cf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll
@@ -12,7 +12,7 @@ define void @foo(ptr %v0, ptr readonly %v1) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[T14]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], <i32 4, i32 4, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i64>, ptr [[T142]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[TMP3]], <i64 4, i64 4>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <2 x i64> [[TMP3]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr [[T222]], align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <2 x i64> [[TMP5]], <i64 6, i64 7>
 ; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[T212]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
index ae6e6723706cd9..0b6c8f3d2562b3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/compare-reduce.ll
@@ -82,7 +82,7 @@ define float @merge_anyof_v4f32_wrong_first(<4 x float> %x) {
 ; CHECK-LABEL: @merge_anyof_v4f32_wrong_first(
 ; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
 ; CHECK-NEXT:    [[CMP3WRONG:%.*]] = fcmp olt float [[X3]], 4.200000e+01
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt <4 x float> [[X]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt <4 x float> [[X]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = or i1 [[TMP2]], [[CMP3WRONG]]
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[OP_RDX]], float -1.000000e+00, float 1.000000e+00
@@ -109,7 +109,7 @@ define float @merge_anyof_v4f32_wrong_last(<4 x float> %x) {
 ; CHECK-LABEL: @merge_anyof_v4f32_wrong_last(
 ; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x float> [[X:%.*]], i32 3
 ; CHECK-NEXT:    [[CMP3WRONG:%.*]] = fcmp olt float [[X3]], 4.200000e+01
-; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt <4 x float> [[X]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP1:%.*]] = fcmp ogt <4 x float> [[X]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = or i1 [[TMP2]], [[CMP3WRONG]]
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[OP_RDX]], float -1.000000e+00, float 1.000000e+00
@@ -136,7 +136,7 @@ define i32 @merge_anyof_v4i32_wrong_middle(<4 x i32> %x) {
 ; CHECK-LABEL: @merge_anyof_v4i32_wrong_middle(
 ; CHECK-NEXT:    [[X3:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 3
 ; CHECK-NEXT:    [[CMP3WRONG:%.*]] = icmp slt i32 [[X3]], 42
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = or i1 [[TMP2]], [[CMP3WRONG]]
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[OP_RDX]], i32 -1, i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
index 4eb0be61b51c41..925b348cdeec1f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet3.ll
@@ -24,11 +24,11 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %ve
 ; CHECK:       for.body233:
 ; CHECK-NEXT:    br i1 undef, label [[FOR_BODY233]], label [[FOR_END271]]
 ; CHECK:       for.end271:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x float> [ <float 0x47EFFFFFE0000000, float 0x47EFFFFFE0000000>, [[FOR_END227]] ], [ undef, [[FOR_BODY233]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x float> [ splat (float 0x47EFFFFFE0000000), [[FOR_END227]] ], [ undef, [[FOR_BODY233]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub <2 x float> undef, [[TMP0]]
 ; CHECK-NEXT:    br i1 undef, label [[IF_THEN291:%.*]], label [[RETURN]]
 ; CHECK:       if.then291:
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 5.000000e-01, float 5.000000e-01>
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], splat (float 5.000000e-01)
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x float> [[TMP0]], [[TMP2]]
 ; CHECK-NEXT:    br i1 undef, label [[IF_END332:%.*]], label [[IF_ELSE319:%.*]]
 ; CHECK:       if.else319:
@@ -40,7 +40,7 @@ define void @_ZN11HullLibrary15CleanupVerticesEjPK9btVector3jRjPS0_fRS0_(ptr %ve
 ; CHECK:       if.then329:
 ; CHECK-NEXT:    br label [[IF_END332]]
 ; CHECK:       if.end332:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ <float 0x3F847AE140000000, float 0x3F847AE140000000>, [[IF_THEN291]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x float> [ [[TMP1]], [[IF_THEN329]] ], [ [[TMP1]], [[IF_END327]] ], [ splat (float 0x3F847AE140000000), [[IF_THEN291]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fsub <2 x float> [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    store <2 x float> [[TMP5]], ptr [[VERTICES:%.*]], align 4
 ; CHECK-NEXT:    br label [[RETURN]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
index 849c1d7c192dce..988e8985b9f151 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_cmpop.ll
@@ -24,20 +24,20 @@ define void @testfunc(ptr nocapture %dest, ptr nocapture readonly %src) {
 ; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x float> [[TMP4]], [[SHUFFLE1]]
-; CHECK-NEXT:    [[TMP6:%.*]] = fcmp olt <2 x float> [[TMP5]], <float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP7:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP5]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP8:%.*]] = fcmp olt <2 x float> [[TMP7]], <float -1.000000e+00, float -1.000000e+00>
+; CHECK-NEXT:    [[TMP8:%.*]] = fcmp olt <2 x float> [[TMP5]], splat (float 1.000000e+00)
+; CHECK-NEXT:    [[TMP7:%.*]] = select <2 x i1> [[TMP8]], <2 x float> [[TMP5]], <2 x float> splat (float 1.000000e+00)
+; CHECK-NEXT:    [[TMP15:%.*]] = fcmp olt <2 x float> [[TMP7]], splat (float -1.000000e+00)
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x float> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP10:%.*]] = select <2 x i1> [[TMP8]], <2 x float> <float -0.000000e+00, float -0.000000e+00>, <2 x float> [[TMP9]]
+; CHECK-NEXT:    [[TMP10:%.*]] = select <2 x i1> [[TMP15]], <2 x float> splat (float -0.000000e+00), <2 x float> [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x float> [[TMP10]], i32 1
 ; CHECK-NEXT:    [[ADD13]] = fadd float [[TMP11]], [[TMP12]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> poison, <2 x i32> <i32 1, i32 poison>
 ; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[ADD13]], i32 1
-; CHECK-NEXT:    [[TMP15:%.*]] = fcmp olt <2 x float> [[TMP14]], <float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x float> [[TMP14]], <2 x float> <float 1.000000e+00, float 1.000000e+00>
-; CHECK-NEXT:    [[TMP17:%.*]] = fcmp olt <2 x float> [[TMP16]], <float -1.000000e+00, float -1.000000e+00>
-; CHECK-NEXT:    [[TMP18]] = select <2 x i1> [[TMP17]], <2 x float> <float -1.000000e+00, float -1.000000e+00>, <2 x float> [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = fcmp olt <2 x float> [[TMP14]], splat (float 1.000000e+00)
+; CHECK-NEXT:    [[TMP20:%.*]] = select <2 x i1> [[TMP17]], <2 x float> [[TMP14]], <2 x float> splat (float 1.000000e+00)
+; CHECK-NEXT:    [[TMP19:%.*]] = fcmp olt <2 x float> [[TMP20]], splat (float -1.000000e+00)
+; CHECK-NEXT:    [[TMP18]] = select <2 x i1> [[TMP19]], <2 x float> splat (float -1.000000e+00), <2 x float> [[TMP20]]
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.end:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
index 2f69a01bb5e370..95ae544e2c62f9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
@@ -15,7 +15,7 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.638400e+04, double 1.638400e+04>
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.638400e+04)
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[ADD]], i32 0
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
index 9b35fcaebadf95..1e31772b8e49ed 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -15,7 +15,7 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[MUL20]], 8.192000e+03
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[P2:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[P1:%.*]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.638400e+04, double 1.638400e+04>
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.638400e+04)
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> <double poison, double 0.000000e+00>, double [[ADD]], i32 0
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
index 1c0fa1d85f9e4a..a9f92f324d6f54 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_sim4b1.ll
@@ -67,7 +67,7 @@ define void @SIM4() {
 ; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x i32> [ [[TMP3]], [[IF_THEN_I]] ], [ undef, [[WHILE_END275_I]] ]
 ; CHECK-NEXT:    br i1 false, label [[IF_THEN157:%.*]], label [[LAND_LHS_TRUE167]]
 ; CHECK:       if.then157:
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i32> [[TMP4]], splat (i32 1)
 ; CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr undef, align 4
 ; CHECK-NEXT:    br label [[LAND_LHS_TRUE167]]
 ; CHECK:       land.lhs.true167:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
index 1f33676c19c148..42ad20ff578c10 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
@@ -27,7 +27,7 @@ define void @main() {
 ; CHECK-NEXT:    [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, 0x3EB0C6F7A0B5ED8D
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> <double poison, double 0xBFA5CC2D1960285F>, double [[ADD_I276_US]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <2 x double> <double 0.000000e+00, double 1.000000e-01>, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], <double 1.400000e+02, double 1.400000e+02>
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.400000e+02)
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], <double 5.000000e+01, double 5.200000e+01>
 ; CHECK-NEXT:    store <2 x double> [[TMP3]], ptr undef, align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> <double 2.000000e-01, double 3.000000e-01>, [[TMP1]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
index 9c8569fcbdf759..739e3964c2685f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cse.ll
@@ -179,11 +179,11 @@ define i32 @foo4(ptr nocapture %A, i32 %n) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[N:%.*]] to double
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <4 x double> [[TMP0]], <double 7.900000e+00, double 7.900000e+00, double 7.900000e+00, double 7.900000e+00>
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <4 x double> [[TMP0]], splat (double 7.900000e+00)
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> poison, double [[CONV]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <4 x double> [[TMP3]], [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 6.000000e+00, double 6.000000e+00, double 6.000000e+00, double 6.000000e+00>
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], splat (double 6.000000e+00)
 ; CHECK-NEXT:    store <4 x double> [[TMP5]], ptr [[A]], align 8
 ; CHECK-NEXT:    ret i32 undef
 ;
@@ -335,8 +335,8 @@ define void @cse_for_hoisted_instructions_in_preheader(ptr %dst, i32 %a, i1 %c)
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> <i32 22, i32 22>, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[TMP2]], <i32 3, i32 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> splat (i32 22), [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = or <2 x i32> [[TMP2]], splat (i32 3)
 ; CHECK-NEXT:    store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4
 ; CHECK-NEXT:    [[OR_2:%.*]] = or i32 [[A]], 3
 ; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 10
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/debug-counter.ll b/llvm/test/Transforms/SLPVectorizer/X86/debug-counter.ll
index d203888e0c6ee8..b42f36f105636e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/debug-counter.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/debug-counter.ll
@@ -25,12 +25,12 @@ define void @blam(ptr %arg, double %load2, i1 %fcmp3) {
 ; COUNT0-NEXT:    [[TMP12:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 0>
 ; COUNT0-NEXT:    [[TMP13:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]]
 ; COUNT0-NEXT:    [[TMP14:%.*]] = fcmp olt <2 x double> [[TMP13]], zeroinitializer
-; COUNT0-NEXT:    [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; COUNT0-NEXT:    [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
 ; COUNT0-NEXT:    [[TMP16:%.*]] = fcmp ogt <2 x double> [[TMP15]], zeroinitializer
 ; COUNT0-NEXT:    [[TMP17:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer
 ; COUNT0-NEXT:    [[TMP18:%.*]] = select <2 x i1> [[TMP16]], <2 x double> zeroinitializer, <2 x double> [[TMP17]]
 ; COUNT0-NEXT:    [[TMP19:%.*]] = fcmp olt <2 x double> [[TMP18]], zeroinitializer
-; COUNT0-NEXT:    [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> <double 1.000000e+00, double 1.000000e+00>, <2 x double> zeroinitializer
+; COUNT0-NEXT:    [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
 ; COUNT0-NEXT:    store <2 x double> [[TMP20]], ptr [[GETELEMENTPTR13]], align 8
 ; COUNT0-NEXT:    ret void
 ;
@@ -135,12 +135,12 @@ define void @blam(ptr %arg, double %load2, i1 %fcmp3) {
 ; COUNT-1-NEXT:    [[TMP12:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 0>
 ; COUNT-1-NEXT:    [[TMP13:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]]
 ; COUNT-1-NEXT:    [[TMP14:%.*]] = fcmp olt <2 x double> [[TMP13]], zeroinitializer
-; COUNT-1-NEXT:    [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; COUNT-1-NEXT:    [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
 ; COUNT-1-NEXT:    [[TMP16:%.*]] = fcmp ogt <2 x double> [[TMP15]], zeroinitializer
 ; COUNT-1-NEXT:    [[TMP17:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer
 ; COUNT-1-NEXT:    [[TMP18:%.*]] = select <2 x i1> [[TMP16]], <2 x double> zeroinitializer, <2 x double> [[TMP17]]
 ; COUNT-1-NEXT:    [[TMP19:%.*]] = fcmp olt <2 x double> [[TMP18]], zeroinitializer
-; COUNT-1-NEXT:    [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> <double 1.000000e+00, double 1.000000e+00>, <2 x double> zeroinitializer
+; COUNT-1-NEXT:    [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
 ; COUNT-1-NEXT:    store <2 x double> [[TMP20]], ptr [[GETELEMENTPTR13]], align 8
 ; COUNT-1-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll b/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll
index feeb34e2f1145b..22c1d7ce388ab5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/different-vec-widths.ll
@@ -17,13 +17,13 @@ define void @PR28457(ptr noalias nocapture align 32 %q, ptr noalias nocapture re
 ; SSE-NEXT:    [[Q2:%.*]] = getelementptr inbounds double, ptr [[Q:%.*]], i64 2
 ; SSE-NEXT:    [[Q4:%.*]] = getelementptr inbounds double, ptr [[Q]], i64 4
 ; SSE-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[P]], align 8
-; SSE-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], <double 1.000000e+00, double 1.000000e+00>
+; SSE-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], splat (double 1.000000e+00)
 ; SSE-NEXT:    store <2 x double> [[TMP3]], ptr [[Q]], align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = load <2 x double>, ptr [[P2]], align 8
-; SSE-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], <double 1.000000e+00, double 1.000000e+00>
+; SSE-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], splat (double 1.000000e+00)
 ; SSE-NEXT:    store <2 x double> [[TMP7]], ptr [[Q2]], align 8
 ; SSE-NEXT:    [[TMP10:%.*]] = load <2 x double>, ptr [[P4]], align 8
-; SSE-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], <double 1.000000e+00, double 1.000000e+00>
+; SSE-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], splat (double 1.000000e+00)
 ; SSE-NEXT:    store <2 x double> [[TMP11]], ptr [[Q4]], align 8
 ; SSE-NEXT:    ret void
 ;
@@ -31,10 +31,10 @@ define void @PR28457(ptr noalias nocapture align 32 %q, ptr noalias nocapture re
 ; AVX-NEXT:    [[P4:%.*]] = getelementptr inbounds double, ptr [[P:%.*]], i64 4
 ; AVX-NEXT:    [[Q4:%.*]] = getelementptr inbounds double, ptr [[Q:%.*]], i64 4
 ; AVX-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr [[P]], align 8
-; AVX-NEXT:    [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+; AVX-NEXT:    [[TMP3:%.*]] = fadd <4 x double> [[TMP2]], splat (double 1.000000e+00)
 ; AVX-NEXT:    store <4 x double> [[TMP3]], ptr [[Q]], align 8
 ; AVX-NEXT:    [[TMP6:%.*]] = load <2 x double>, ptr [[P4]], align 8
-; AVX-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], <double 1.000000e+00, double 1.000000e+00>
+; AVX-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], splat (double 1.000000e+00)
 ; AVX-NEXT:    store <2 x double> [[TMP7]], ptr [[Q4]], align 8
 ; AVX-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll
index 3d3d00f4a0b3fd..fb25ff975adc2e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external-used-across-reductions.ll
@@ -12,8 +12,8 @@ define void @test() {
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[PHI1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX25:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[TMP6:%.*]] = phi <8 x i64> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP7:%.*]] = mul <8 x i64> [[TMP6]], <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[TMP5:%.*]] = mul <8 x i64> [[TMP1]], <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-NEXT:    [[TMP7:%.*]] = mul <8 x i64> [[TMP6]], splat (i64 4)
+; CHECK-NEXT:    [[TMP5:%.*]] = mul <8 x i64> [[TMP1]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP7]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]])
 ; CHECK-NEXT:    [[OP_RDX16:%.*]] = add i64 [[TMP9]], [[TMP8]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
index 597ccd712e0a69..92157b24c6bb05 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
@@ -31,9 +31,9 @@ define double @ext_user(ptr noalias nocapture %B, ptr noalias nocapture %A, i32
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_020:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], <double 1.000000e+01, double 1.000000e+01>
-; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], <double 4.000000e+00, double 4.000000e+00>
-; CHECK-NEXT:    [[TMP5]] = fadd <2 x double> [[TMP4]], <double 4.000000e+00, double 4.000000e+00>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], splat (double 1.000000e+01)
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], splat (double 4.000000e+00)
+; CHECK-NEXT:    [[TMP5]] = fadd <2 x double> [[TMP4]], splat (double 4.000000e+00)
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_020]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
index 1374e9873e1c53..02c3173adc654f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll
@@ -12,7 +12,7 @@ define i32 @foo(ptr nocapture %A, i32 %n, i32 %m) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
 ; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[EXTERNALUSE1:%.*]] = add nsw i32 [[TMP6]], [[M:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll
index 757d0b1708b6fb..d372b47c439d2d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-node-same-as-vect-but-order.ll
@@ -42,7 +42,7 @@ define void @foo(ptr %i7, i32 %0, i1 %tobool62.not) {
 ; CHECK-NEXT:    br label [[IF_END75]]
 ; CHECK:       if.end75:
 ; CHECK-NEXT:    [[TMP22:%.*]] = phi <4 x i32> [ [[TMP20]], [[IF_THEN74]] ], [ [[TMP21]], [[IF_END72]] ]
-; CHECK-NEXT:    [[TMP23:%.*]] = or <4 x i32> [[TMP22]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP23:%.*]] = or <4 x i32> [[TMP22]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP25:%.*]] = mul <4 x i32> [[TMP23]], [[TMP24]]
 ; CHECK-NEXT:    [[TMP26:%.*]] = sitofp <4 x i32> [[TMP25]] to <4 x float>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll
index 7e75970de34929..3ac0d01cf9a2c1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll
@@ -12,7 +12,7 @@ define i1 @test(i32 %g, i16 %d) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc <2 x i32> [[TMP9]] to <2 x i8>
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i8> [[TMP5]], <i8 -9, i8 -9, i8 -1, i8 -1>
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt <4 x i8> [[TMP6]], <i8 -3, i8 -3, i8 -3, i8 -3>
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp sgt <4 x i8> [[TMP6]], splat (i8 -3)
 ; CHECK-NEXT:    [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i8>
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP11:%.*]] = zext <4 x i8> [[TMP8]] to <4 x i32>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
index 17f5abada632d8..1514fcec02bd9c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gep.ll
@@ -14,7 +14,7 @@ define void @foo1 (ptr noalias %x, ptr noalias %y) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds { ptr, ptr }, ptr [[Y:%.*]], i64 0, i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds { ptr, ptr }, ptr [[X:%.*]], i64 0, i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x ptr>, ptr [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP4]], <2 x i64> <i64 16, i64 16>
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP4]], <2 x i64> splat (i64 16)
 ; CHECK-NEXT:    store <2 x ptr> [[TMP5]], ptr [[TMP2]], align 8
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
index e0d7c12f70c2ef..63b41627106e5c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll
@@ -18,7 +18,7 @@ define dso_local i32 @g() local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
 ; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 1
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 1)
 ; CHECK-NEXT:    switch i32 [[TMP4]], label [[WHILE_BODY_BACKEDGE]] [
 ; CHECK-NEXT:      i32 2, label [[SW_BB:%.*]]
 ; CHECK-NEXT:      i32 4, label [[SW_BB6:%.*]]
@@ -29,13 +29,13 @@ define dso_local i32 @g() local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
 ; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 2
 ; CHECK-NEXT:    store i32 [[TMP8]], ptr [[INCDEC_PTR1]], align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 2)
 ; CHECK-NEXT:    br label [[WHILE_BODY_BACKEDGE]]
 ; CHECK:       sw.bb6:
 ; CHECK-NEXT:    [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[A_020]], i64 2
 ; CHECK-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[INCDEC_PTR]] to i64
 ; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> splat (i64 2)
 ; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0
 ; CHECK-NEXT:    store i32 [[TMP11]], ptr [[TMP13]], align 4
 ; CHECK-NEXT:    br label [[WHILE_BODY_BACKEDGE]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
index 72e29839230e81..60e30b6735e534 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-list.ll
@@ -34,7 +34,7 @@ define float @baz() {
 ; THRESHOLD-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]])
 ; THRESHOLD-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0
 ; THRESHOLD-NEXT:    [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[CONV]], i32 1
-; THRESHOLD-NEXT:    [[TMP7:%.*]] = fmul fast <2 x float> [[TMP6]], <float 2.000000e+00, float 2.000000e+00>
+; THRESHOLD-NEXT:    [[TMP7:%.*]] = fmul fast <2 x float> [[TMP6]], splat (float 2.000000e+00)
 ; THRESHOLD-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0
 ; THRESHOLD-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1
 ; THRESHOLD-NEXT:    [[OP_RDX:%.*]] = fadd fast float [[TMP8]], [[TMP9]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
index cf9f7f7376ba44..990803870ec23f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -29,7 +29,7 @@ define i32 @add_red(ptr %A, i32 %n) {
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i64 [[I_033]], 2
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[MUL]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], <float 7.000000e+00, float 7.000000e+00, float 7.000000e+00, float 7.000000e+00>
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <4 x float> [[TMP1]], splat (float 7.000000e+00)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP2]])
 ; CHECK-NEXT:    [[ADD17]] = fadd fast float [[SUM_032]], [[TMP3]]
 ; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_033]], 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
index 3a35a63c159815..f0a1836e6c273f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll
@@ -320,52 +320,52 @@ define i32 @foo1() local_unnamed_addr #0 {
 ; SSE-LABEL: @foo1(
 ; SSE-NEXT:  entry:
 ; SSE-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @ib, align 16
-; SSE-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP1]], ptr @ia, align 16
 ; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 4), align 16
-; SSE-NEXT:    [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP3]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 4), align 16
 ; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 8), align 16
-; SSE-NEXT:    [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP5]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 8), align 16
 ; SSE-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 12), align 16
-; SSE-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP7]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 12), align 16
 ; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 16), align 16
-; SSE-NEXT:    [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP9]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 16), align 16
 ; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 20), align 16
-; SSE-NEXT:    [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP11]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 20), align 16
 ; SSE-NEXT:    [[TMP12:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 24), align 16
-; SSE-NEXT:    [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP13]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 24), align 16
 ; SSE-NEXT:    [[TMP14:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 28), align 16
-; SSE-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP15]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 28), align 16
 ; SSE-NEXT:    [[TMP16:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 32), align 16
-; SSE-NEXT:    [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP17]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 32), align 16
 ; SSE-NEXT:    [[TMP18:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 36), align 16
-; SSE-NEXT:    [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP19]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 36), align 16
 ; SSE-NEXT:    [[TMP20:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 40), align 16
-; SSE-NEXT:    [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP21]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 40), align 16
 ; SSE-NEXT:    [[TMP22:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 44), align 16
-; SSE-NEXT:    [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP23]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 44), align 16
 ; SSE-NEXT:    [[TMP24:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 48), align 16
-; SSE-NEXT:    [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP25]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 48), align 16
 ; SSE-NEXT:    [[TMP26:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 52), align 16
-; SSE-NEXT:    [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP27]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 52), align 16
 ; SSE-NEXT:    [[TMP28:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 56), align 16
-; SSE-NEXT:    [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP29]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 56), align 16
 ; SSE-NEXT:    [[TMP30:%.*]] = load <4 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 60), align 16
-; SSE-NEXT:    [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; SSE-NEXT:    [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], splat (i32 -1)
 ; SSE-NEXT:    store <4 x i32> [[TMP31]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 60), align 16
 ; SSE-NEXT:    br label [[FOR_BODY5:%.*]]
 ; SSE:       for.cond3:
@@ -390,16 +390,16 @@ define i32 @foo1() local_unnamed_addr #0 {
 ; AVX512-LABEL: @foo1(
 ; AVX512-NEXT:  entry:
 ; AVX512-NEXT:    [[TMP0:%.*]] = load <16 x i32>, ptr @ib, align 16
-; AVX512-NEXT:    [[TMP1:%.*]] = xor <16 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; AVX512-NEXT:    [[TMP1:%.*]] = xor <16 x i32> [[TMP0]], splat (i32 -1)
 ; AVX512-NEXT:    store <16 x i32> [[TMP1]], ptr @ia, align 16
 ; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 16), align 16
-; AVX512-NEXT:    [[TMP3:%.*]] = xor <16 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; AVX512-NEXT:    [[TMP3:%.*]] = xor <16 x i32> [[TMP2]], splat (i32 -1)
 ; AVX512-NEXT:    store <16 x i32> [[TMP3]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 16), align 16
 ; AVX512-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 32), align 16
-; AVX512-NEXT:    [[TMP5:%.*]] = xor <16 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; AVX512-NEXT:    [[TMP5:%.*]] = xor <16 x i32> [[TMP4]], splat (i32 -1)
 ; AVX512-NEXT:    store <16 x i32> [[TMP5]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 32), align 16
 ; AVX512-NEXT:    [[TMP6:%.*]] = load <16 x i32>, ptr getelementptr inbounds ([64 x i32], ptr @ib, i64 0, i64 48), align 16
-; AVX512-NEXT:    [[TMP7:%.*]] = xor <16 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; AVX512-NEXT:    [[TMP7:%.*]] = xor <16 x i32> [[TMP6]], splat (i32 -1)
 ; AVX512-NEXT:    store <16 x i32> [[TMP7]], ptr getelementptr inbounds ([64 x i32], ptr @ia, i64 0, i64 48), align 16
 ; AVX512-NEXT:    br label [[FOR_BODY5:%.*]]
 ; AVX512:       for.cond3:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
index 252da8d7fa7a72..b49d99f353b822 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/long_chains.ll
@@ -10,18 +10,18 @@ define i32 @test(ptr nocapture %A, ptr nocapture %B) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i8>, ptr [[B:%.*]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], <i8 3, i8 3>
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i8> [[TMP1]], splat (i8 3)
 ; CHECK-NEXT:    [[TMP3:%.*]] = sitofp <2 x i8> [[TMP2]] to <2 x double>
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    store <2 x double> [[TMP13]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    ret i32 undef
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
index 3d12da79ebf952..fd307cc86c7ac9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
@@ -22,9 +22,9 @@ define i32 @bar() local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <16 x i32> [[TMP5]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = sub nsw <16 x i32> [[TMP5]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> [[TMP10]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
-; CHECK-NEXT:    [[TMP12:%.*]] = lshr <16 x i32> [[TMP11]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP13:%.*]] = and <16 x i32> [[TMP12]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP14:%.*]] = mul nuw <16 x i32> [[TMP13]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP12:%.*]] = lshr <16 x i32> [[TMP11]], splat (i32 15)
+; CHECK-NEXT:    [[TMP13:%.*]] = and <16 x i32> [[TMP12]], splat (i32 65537)
+; CHECK-NEXT:    [[TMP14:%.*]] = mul nuw <16 x i32> [[TMP13]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP15:%.*]] = add <16 x i32> [[TMP14]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = xor <16 x i32> [[TMP15]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP16]])
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
index e5d7ad138b4def..166c819098c8c1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matching-gather-nodes-phi-users.ll
@@ -24,7 +24,7 @@ define void @test(ptr %dst, float %a, float %b, float %c, float %d) {
 ; CHECK-NEXT:  Entry:
 ; CHECK-NEXT:    br i1 poison, label [[LOOP0:%.*]], label [[EXIT:%.*]]
 ; CHECK:       loop0:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x float> [ <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, [[ENTRY:%.*]] ], [ [[TMP7:%.*]], [[USERBLOCK1:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x float> [ splat (float 5.000000e-01), [[ENTRY:%.*]] ], [ [[TMP7:%.*]], [[USERBLOCK1:%.*]] ]
 ; CHECK-NEXT:    br i1 poison, label [[USERBLOCK0:%.*]], label [[BLKX:%.*]]
 ; CHECK:       UserBlock0:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <4 x float> [ zeroinitializer, [[LOOP0]] ], [ [[TMP5:%.*]], [[BLKX]] ]
@@ -48,7 +48,7 @@ define void @test(ptr %dst, float %a, float %b, float %c, float %d) {
 ; CHECK-NEXT:    [[TMP9]] = fadd fast <4 x float> [[TMP8]], poison
 ; CHECK-NEXT:    br i1 poison, label [[USERBLOCK1]], label [[LOOP_INNER]]
 ; CHECK:       Exit:
-; CHECK-NEXT:    [[TMP10:%.*]] = phi <4 x float> [ <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01>, [[ENTRY]] ], [ [[TMP7]], [[USERBLOCK1]] ]
+; CHECK-NEXT:    [[TMP10:%.*]] = phi <4 x float> [ splat (float 5.000000e-01), [[ENTRY]] ], [ [[TMP7]], [[USERBLOCK1]] ]
 ; CHECK-NEXT:    [[IDX0:%.*]] = add i64 0, poison
 ; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IDX0]]
 ; CHECK-NEXT:    store <4 x float> [[TMP10]], ptr [[GEP0]], align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll
index e1fd8a7ec88afc..f7175692c97568 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll
@@ -20,15 +20,15 @@ define i1 @test(ptr noalias %0, i64 %1, ptr noalias %p, ptr %p1) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i24> <i24 23, i24 23>, <2 x i24> [[TMP8]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = trunc <2 x i24> [[TMP10]] to <2 x i8>
 ; CHECK-NEXT:    [[TMP26:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32>
-; CHECK-NEXT:    [[TMP12:%.*]] = and <2 x i32> [[TMP26]], <i32 254, i32 254>
-; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq <2 x i32> [[TMP12]], <i32 4, i32 4>
-; CHECK-NEXT:    [[TMP25:%.*]] = select <2 x i1> [[TMP13]], <2 x i8> <i8 2, i8 2>, <2 x i8> [[TMP23]]
+; CHECK-NEXT:    [[TMP13:%.*]] = and <2 x i32> [[TMP26]], splat (i32 254)
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP13]], splat (i32 4)
+; CHECK-NEXT:    [[TMP25:%.*]] = select <2 x i1> [[TMP15]], <2 x i8> splat (i8 2), <2 x i8> [[TMP23]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = zext <2 x i8> [[TMP25]] to <2 x i32>
-; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP14]], <i32 32, i32 32>
-; CHECK-NEXT:    [[TMP18:%.*]] = select <2 x i1> [[TMP15]], <2 x i8> <i8 31, i8 31>, <2 x i8> [[TMP25]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq <2 x i32> [[TMP14]], splat (i32 32)
+; CHECK-NEXT:    [[TMP18:%.*]] = select <2 x i1> [[TMP17]], <2 x i8> splat (i8 31), <2 x i8> [[TMP25]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = zext <2 x i8> [[TMP18]] to <2 x i32>
-; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq <2 x i32> [[TMP16]], <i32 54, i32 54>
-; CHECK-NEXT:    [[TMP21:%.*]] = select <2 x i1> [[TMP17]], <2 x i8> <i8 53, i8 53>, <2 x i8> [[TMP18]]
+; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq <2 x i32> [[TMP16]], splat (i32 54)
+; CHECK-NEXT:    [[TMP21:%.*]] = select <2 x i1> [[TMP27]], <2 x i8> splat (i8 53), <2 x i8> [[TMP18]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x i8> [[TMP21]], i32 0
 ; CHECK-NEXT:    [[TMP19:%.*]] = zext i8 [[TMP22]] to i32
 ; CHECK-NEXT:    store i32 [[TMP19]], ptr [[P1]], align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll
index a316415dcc6b52..b4cbdfa10460f6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minimum-sizes.ll
@@ -19,7 +19,7 @@ define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
 ; SSE-NEXT:  entry:
 ; SSE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
 ; SSE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
-; SSE-NEXT:    [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
+; SSE-NEXT:    [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1)
 ; SSE-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
 ; SSE-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
 ; SSE-NEXT:    [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
@@ -35,7 +35,7 @@ define i8 @PR31243_zext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
 ; AVX-NEXT:  entry:
 ; AVX-NEXT:    [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
 ; AVX-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
-; AVX-NEXT:    [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
+; AVX-NEXT:    [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1)
 ; AVX-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
 ; AVX-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i64
 ; AVX-NEXT:    [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
@@ -78,7 +78,7 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
 ; SSE-NEXT:  entry:
 ; SSE-NEXT:    [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
 ; SSE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
-; SSE-NEXT:    [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
+; SSE-NEXT:    [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1)
 ; SSE-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
 ; SSE-NEXT:    [[TMP4:%.*]] = sext i8 [[TMP3]] to i64
 ; SSE-NEXT:    [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
@@ -94,7 +94,7 @@ define i8 @PR31243_sext(i8 %v0, i8 %v1, i8 %v2, i8 %v3, ptr %ptr) {
 ; AVX-NEXT:  entry:
 ; AVX-NEXT:    [[TMP0:%.*]] = insertelement <2 x i8> poison, i8 [[V0:%.*]], i64 0
 ; AVX-NEXT:    [[TMP1:%.*]] = insertelement <2 x i8> [[TMP0]], i8 [[V1:%.*]], i64 1
-; AVX-NEXT:    [[TMP2:%.*]] = or <2 x i8> [[TMP1]], <i8 1, i8 1>
+; AVX-NEXT:    [[TMP2:%.*]] = or <2 x i8> [[TMP1]], splat (i8 1)
 ; AVX-NEXT:    [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i64 0
 ; AVX-NEXT:    [[TMP4:%.*]] = sext i8 [[TMP3]] to i64
 ; AVX-NEXT:    [[T4:%.*]] = getelementptr inbounds i8, ptr [[PTR:%.*]], i64 [[TMP4]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll b/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll
index 4b5b5da21cd21d..35625d70a26671 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/mul64.ll
@@ -9,8 +9,8 @@ define void @PR62969(ptr dereferenceable(16) %out, ptr dereferenceable(16) %in)
 ; CHECK-NEXT:    [[IN0:%.*]] = getelementptr inbounds [2 x i64], ptr [[IN:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[OUT0:%.*]] = getelementptr inbounds [2 x i64], ptr [[OUT:%.*]], i64 0, i64 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[IN0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP1]], <i64 4294967295, i64 4294967295>
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], <i64 32, i64 32>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP1]], splat (i64 4294967295)
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 32)
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul <2 x i64> [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    store <2 x i64> [[TMP4]], ptr [[OUT0]], align 8
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll
index 47dd84c7f6e9cc..feecdaf55597ff 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-order-detection.ll
@@ -22,7 +22,7 @@ define void @e(ptr %c, i64 %0) {
 ; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <32 x i64> poison, i64 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <32 x i64> [[TMP13]], <32 x i64> poison, <32 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP15:%.*]] = or <32 x i64> [[TMP14]], [[TMP12]]
-; CHECK-NEXT:    [[TMP16:%.*]] = icmp ult <32 x i64> [[TMP15]], <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ult <32 x i64> [[TMP15]], splat (i64 16)
 ; CHECK-NEXT:    [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP16]])
 ; CHECK-NEXT:    br i1 [[TMP17]], label %[[FOR_BODY:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
index ec8bcc85e7db0c..78bfb8df51aebb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi.ll
@@ -78,9 +78,9 @@ define i32 @foo2(ptr noalias nocapture %B, ptr noalias nocapture %A, i32 %n, i32
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[I_019:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x double> [ [[TMP0]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], <double 1.000000e+01, double 1.000000e+01>
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], <double 4.000000e+00, double 4.000000e+00>
-; CHECK-NEXT:    [[TMP4]] = fadd <2 x double> [[TMP3]], <double 4.000000e+00, double 4.000000e+00>
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], splat (double 1.000000e+01)
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], splat (double 4.000000e+00)
+; CHECK-NEXT:    [[TMP4]] = fadd <2 x double> [[TMP3]], splat (double 4.000000e+00)
 ; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_019]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
@@ -236,7 +236,7 @@ define float @sort_phi_type(ptr nocapture readonly %A) {
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x float> [ <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x float> [ splat (float 1.000000e+01), [[ENTRY]] ], [ [[TMP2:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 3, i32 2>
 ; CHECK-NEXT:    [[TMP2]] = fmul <4 x float> [[TMP1]], <float 8.000000e+00, float 9.000000e+00, float 1.000000e+02, float 1.110000e+02>
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
index 3b42ff9d85604d..038409c56789b4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi3.ll
@@ -15,7 +15,7 @@ define void @Rf_GReset() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr @d, align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> <double poison, double undef>, double [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub <2 x double> splat (double -0.000000e+00), [[TMP1]]
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq ptr inttoptr (i64 115 to ptr), @Rf_gpptr
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[IF_THEN:%.*]], label [[IF_END7:%.*]]
 ; CHECK:       if.then:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
index c61d1fbc394798..bcf36c623c27b8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2div.ll
@@ -9,7 +9,7 @@ define void @powof2div_uniform(ptr noalias nocapture %a, ptr noalias nocapture r
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = sdiv <4 x i32> [[TMP4]], splat (i32 2)
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll b/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll
index d8e646b544179f..5e8119f4207fb5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/powof2mul.ll
@@ -9,7 +9,7 @@ define void @powof2mul_uniform(ptr noalias nocapture %a, ptr noalias nocapture r
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], splat (i32 2)
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -52,7 +52,7 @@ define void @negpowof2mul_uniform(ptr noalias nocapture %a, ptr noalias nocaptur
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[C:%.*]], align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]]
-; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], <i32 -2, i32 -2, i32 -2, i32 -2>
+; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], splat (i32 -2)
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -182,20 +182,20 @@ define void @PR51436(ptr nocapture %a) {
 ; SSE-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 4
 ; SSE-NEXT:    [[GEP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 6
 ; SSE-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 8
-; SSE-NEXT:    [[TMP2:%.*]] = mul <2 x i64> [[TMP1]], <i64 -17592186044416, i64 -17592186044416>
-; SSE-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP2]], <i64 -17592186044416, i64 -17592186044416>
+; SSE-NEXT:    [[TMP2:%.*]] = mul <2 x i64> [[TMP1]], splat (i64 -17592186044416)
+; SSE-NEXT:    [[TMP3:%.*]] = add <2 x i64> [[TMP2]], splat (i64 -17592186044416)
 ; SSE-NEXT:    store <2 x i64> [[TMP3]], ptr [[A]], align 8
 ; SSE-NEXT:    [[TMP6:%.*]] = load <2 x i64>, ptr [[GEP2]], align 8
-; SSE-NEXT:    [[TMP7:%.*]] = mul <2 x i64> [[TMP6]], <i64 -17592186044416, i64 -17592186044416>
-; SSE-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[TMP7]], <i64 -17592186044416, i64 -17592186044416>
+; SSE-NEXT:    [[TMP4:%.*]] = mul <2 x i64> [[TMP6]], splat (i64 -17592186044416)
+; SSE-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[TMP4]], splat (i64 -17592186044416)
 ; SSE-NEXT:    store <2 x i64> [[TMP8]], ptr [[GEP2]], align 8
 ; SSE-NEXT:    [[TMP11:%.*]] = load <2 x i64>, ptr [[GEP4]], align 8
-; SSE-NEXT:    [[TMP12:%.*]] = mul <2 x i64> [[TMP11]], <i64 -17592186044416, i64 -17592186044416>
-; SSE-NEXT:    [[TMP13:%.*]] = add <2 x i64> [[TMP12]], <i64 -17592186044416, i64 -17592186044416>
+; SSE-NEXT:    [[TMP7:%.*]] = mul <2 x i64> [[TMP11]], splat (i64 -17592186044416)
+; SSE-NEXT:    [[TMP13:%.*]] = add <2 x i64> [[TMP7]], splat (i64 -17592186044416)
 ; SSE-NEXT:    store <2 x i64> [[TMP13]], ptr [[GEP4]], align 8
 ; SSE-NEXT:    [[TMP16:%.*]] = load <2 x i64>, ptr [[GEP6]], align 8
-; SSE-NEXT:    [[TMP17:%.*]] = mul <2 x i64> [[TMP16]], <i64 -17592186044416, i64 -17592186044416>
-; SSE-NEXT:    [[TMP18:%.*]] = add <2 x i64> [[TMP17]], <i64 -17592186044416, i64 -17592186044416>
+; SSE-NEXT:    [[TMP10:%.*]] = mul <2 x i64> [[TMP16]], splat (i64 -17592186044416)
+; SSE-NEXT:    [[TMP18:%.*]] = add <2 x i64> [[TMP10]], splat (i64 -17592186044416)
 ; SSE-NEXT:    store <2 x i64> [[TMP18]], ptr [[GEP6]], align 8
 ; SSE-NEXT:    ret void
 ;
@@ -203,12 +203,12 @@ define void @PR51436(ptr nocapture %a) {
 ; AVX-NEXT:  entry:
 ; AVX-NEXT:    [[GEP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 4
 ; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, ptr [[A]], align 8
-; AVX-NEXT:    [[TMP2:%.*]] = mul <4 x i64> [[TMP1]], <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>
-; AVX-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[TMP2]], <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>
+; AVX-NEXT:    [[TMP2:%.*]] = mul <4 x i64> [[TMP1]], splat (i64 -17592186044416)
+; AVX-NEXT:    [[TMP3:%.*]] = add <4 x i64> [[TMP2]], splat (i64 -17592186044416)
 ; AVX-NEXT:    store <4 x i64> [[TMP3]], ptr [[A]], align 8
 ; AVX-NEXT:    [[TMP6:%.*]] = load <4 x i64>, ptr [[GEP4]], align 8
-; AVX-NEXT:    [[TMP7:%.*]] = mul <4 x i64> [[TMP6]], <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>
-; AVX-NEXT:    [[TMP8:%.*]] = add <4 x i64> [[TMP7]], <i64 -17592186044416, i64 -17592186044416, i64 -17592186044416, i64 -17592186044416>
+; AVX-NEXT:    [[TMP4:%.*]] = mul <4 x i64> [[TMP6]], splat (i64 -17592186044416)
+; AVX-NEXT:    [[TMP8:%.*]] = add <4 x i64> [[TMP4]], splat (i64 -17592186044416)
 ; AVX-NEXT:    store <4 x i64> [[TMP8]], ptr [[GEP4]], align 8
 ; AVX-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll
index 8f96968b978a57..dc4e28fe261784 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr23510.ll
@@ -11,16 +11,16 @@ define void @_Z3fooPml(ptr nocapture %a, i64 %i) {
 ; CHECK-LABEL: @_Z3fooPml(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 4, i64 4>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 4)
 ; CHECK-NEXT:    store <2 x i64> [[TMP2]], ptr [[A]], align 8
 ; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[I:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i64, ptr @total, align 8
-; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[TMP3]], [[TMP5]]
 ; CHECK-NEXT:    store i64 [[ADD]], ptr @total, align 8
 ; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i64>, ptr [[A]], align 8
-; CHECK-NEXT:    [[TMP6:%.*]] = lshr <2 x i64> [[TMP5]], <i64 4, i64 4>
-; CHECK-NEXT:    store <2 x i64> [[TMP6]], ptr [[A]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP5]], splat (i64 4)
+; CHECK-NEXT:    store <2 x i64> [[TMP3]], ptr [[A]], align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = load i64, ptr @total, align 8
 ; CHECK-NEXT:    [[ADD9:%.*]] = add i64 [[TMP7]], [[TMP6]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
index cb24a9cefffa2e..9fbe0a54b0688d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr35497.ll
@@ -63,16 +63,16 @@ define void @pr35497() local_unnamed_addr #0 {
 ; SSE-NEXT:    store i64 [[ADD]], ptr undef, align 1
 ; SSE-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], ptr undef, i64 0, i64 4
 ; SSE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> <i64 poison, i64 undef>, i64 [[TMP0]], i32 0
-; SSE-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], <i64 2, i64 2>
-; SSE-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 20, i64 20>
+; SSE-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 2)
+; SSE-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 20)
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> poison, <2 x i32> <i32 1, i32 0>
 ; SSE-NEXT:    [[TMP5:%.*]] = add nuw nsw <2 x i64> [[TMP4]], zeroinitializer
 ; SSE-NEXT:    store <2 x i64> [[TMP5]], ptr undef, align 1
 ; SSE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP5]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
 ; SSE-NEXT:    [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[ADD]], i32 1
-; SSE-NEXT:    [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], <i64 2, i64 2>
-; SSE-NEXT:    [[TMP9:%.*]] = and <2 x i64> [[TMP8]], <i64 20, i64 20>
-; SSE-NEXT:    [[TMP10:%.*]] = lshr <2 x i64> [[TMP5]], <i64 6, i64 6>
+; SSE-NEXT:    [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], splat (i64 2)
+; SSE-NEXT:    [[TMP9:%.*]] = and <2 x i64> [[TMP8]], splat (i64 20)
+; SSE-NEXT:    [[TMP10:%.*]] = lshr <2 x i64> [[TMP5]], splat (i64 6)
 ; SSE-NEXT:    [[TMP11:%.*]] = add nuw nsw <2 x i64> [[TMP9]], [[TMP10]]
 ; SSE-NEXT:    store <2 x i64> [[TMP11]], ptr [[ARRAYIDX2_2]], align 1
 ; SSE-NEXT:    ret void
@@ -84,15 +84,15 @@ define void @pr35497() local_unnamed_addr #0 {
 ; AVX-NEXT:    store i64 [[ADD]], ptr undef, align 1
 ; AVX-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], ptr undef, i64 0, i64 4
 ; AVX-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> <i64 undef, i64 poison>, i64 [[TMP0]], i32 1
-; AVX-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], <i64 2, i64 2>
-; AVX-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 20, i64 20>
+; AVX-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 2)
+; AVX-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 20)
 ; AVX-NEXT:    [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer
 ; AVX-NEXT:    store <2 x i64> [[TMP4]], ptr undef, align 1
 ; AVX-NEXT:    [[TMP5:%.*]] = shufflevector <2 x i64> [[TMP4]], <2 x i64> poison, <2 x i32> <i32 1, i32 poison>
 ; AVX-NEXT:    [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[ADD]], i32 1
-; AVX-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], <i64 2, i64 2>
-; AVX-NEXT:    [[TMP8:%.*]] = and <2 x i64> [[TMP7]], <i64 20, i64 20>
-; AVX-NEXT:    [[TMP9:%.*]] = lshr <2 x i64> [[TMP4]], <i64 6, i64 6>
+; AVX-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP6]], splat (i64 2)
+; AVX-NEXT:    [[TMP8:%.*]] = and <2 x i64> [[TMP7]], splat (i64 20)
+; AVX-NEXT:    [[TMP9:%.*]] = lshr <2 x i64> [[TMP4]], splat (i64 6)
 ; AVX-NEXT:    [[TMP10:%.*]] = add nuw nsw <2 x i64> [[TMP8]], [[TMP9]]
 ; AVX-NEXT:    store <2 x i64> [[TMP10]], ptr [[ARRAYIDX2_2]], align 1
 ; AVX-NEXT:    ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll
index dd51105e1a6ca2..2588012847d096 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr40522.ll
@@ -58,7 +58,7 @@ define void @test2(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture %p) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[B:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[C:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[D:%.*]], i64 3
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP4]], ptr [[P:%.*]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
@@ -83,7 +83,7 @@ define void @test2_vec(i32 %0, i32 %1, i32 %2, i32 %3, ptr nocapture %4) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP1:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP2:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP3:%.*]], i64 3
-; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <4 x i32> [[TMP9]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP10]], ptr [[TMP4:%.*]], align 16, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll
index 264a41daef55b8..e8703f2081c66d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr44067-inseltpoison.ll
@@ -7,7 +7,7 @@ define <2 x float> @foo(ptr %A) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], splat (float 2.000000e+00)
 ; CHECK-NEXT:    ret <2 x float> [[TMP2]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll
index 6960e544b59a8a..a77f2788200dad 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr44067.ll
@@ -7,7 +7,7 @@ define <2 x float> @foo(ptr %A) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], <float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEXT:    [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], splat (float 2.000000e+00)
 ; CHECK-NEXT:    ret <2 x float> [[TMP2]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
index 29021150ccd2e3..bae127527bbbeb 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr46983.ll
@@ -11,8 +11,8 @@ define void @store_i32(ptr nocapture %0, i32 %1, i32 %2) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = mul <4 x i32> [[TMP4]], [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP8]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i32> [[TMP7]], splat (i32 15)
+; CHECK-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP8]], <4 x i32> splat (i32 255))
 ; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr [[TMP0]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
@@ -53,8 +53,8 @@ define void @store_i8(ptr nocapture %0, i32 %1, i32 %2) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i32> [[TMP7]], [[TMP5]]
-; CHECK-NEXT:    [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP10:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP9]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>)
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], splat (i32 15)
+; CHECK-NEXT:    [[TMP10:%.*]] = call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[TMP9]], <4 x i32> splat (i32 255))
 ; CHECK-NEXT:    [[TMP11:%.*]] = trunc nuw <4 x i32> [[TMP10]] to <4 x i8>
 ; CHECK-NEXT:    store <4 x i8> [[TMP11]], ptr [[TMP0]], align 1, !tbaa [[TBAA4]]
 ; CHECK-NEXT:    ret void
@@ -104,11 +104,11 @@ define void @store_i64(ptr nocapture %0, i32 %1, i32 %2) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i64> poison, i64 [[TMP4]], i64 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i64> [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = lshr <4 x i64> [[TMP8]], <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr <4 x i64> [[TMP8]], splat (i64 15)
 ; CHECK-NEXT:    [[TMP10:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32>
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], <i32 255, i32 255, i32 255, i32 255>
-; CHECK-NEXT:    [[TMP12:%.*]] = and <4 x i64> [[TMP9]], <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP12]], <4 x i64> <i64 255, i64 255, i64 255, i64 255>
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult <4 x i32> [[TMP10]], splat (i32 255)
+; CHECK-NEXT:    [[TMP12:%.*]] = and <4 x i64> [[TMP9]], splat (i64 4294967295)
+; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i64> [[TMP12]], <4 x i64> splat (i64 255)
 ; CHECK-NEXT:    store <4 x i64> [[TMP13]], ptr [[TMP0]], align 8, !tbaa [[TBAA5]]
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll
index 92a4095c7c57a8..4f8661f6bac078 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr48879-sroa.ll
@@ -91,7 +91,7 @@ define { i64, i64 } @compute_min(ptr nocapture noundef nonnull readonly align 2
 ; AVX-NEXT:    [[TMP17:%.*]] = shl nuw <2 x i64> [[TMP16]], <i64 48, i64 32>
 ; AVX-NEXT:    [[TMP18:%.*]] = or <2 x i64> [[TMP15]], [[TMP17]]
 ; AVX-NEXT:    [[TMP19:%.*]] = zext <2 x i16> [[TMP7]] to <2 x i64>
-; AVX-NEXT:    [[TMP20:%.*]] = shl nuw nsw <2 x i64> [[TMP19]], <i64 16, i64 16>
+; AVX-NEXT:    [[TMP20:%.*]] = shl nuw nsw <2 x i64> [[TMP19]], splat (i64 16)
 ; AVX-NEXT:    [[TMP21:%.*]] = or <2 x i64> [[TMP18]], [[TMP20]]
 ; AVX-NEXT:    [[TMP22:%.*]] = zext <2 x i16> [[TMP4]] to <2 x i64>
 ; AVX-NEXT:    [[TMP23:%.*]] = or <2 x i64> [[TMP21]], [[TMP22]]
@@ -123,7 +123,7 @@ define { i64, i64 } @compute_min(ptr nocapture noundef nonnull readonly align 2
 ; AVX2-NEXT:    [[TMP17:%.*]] = shl nuw <2 x i64> [[TMP16]], <i64 48, i64 32>
 ; AVX2-NEXT:    [[TMP18:%.*]] = or <2 x i64> [[TMP15]], [[TMP17]]
 ; AVX2-NEXT:    [[TMP19:%.*]] = zext <2 x i16> [[TMP7]] to <2 x i64>
-; AVX2-NEXT:    [[TMP20:%.*]] = shl nuw nsw <2 x i64> [[TMP19]], <i64 16, i64 16>
+; AVX2-NEXT:    [[TMP20:%.*]] = shl nuw nsw <2 x i64> [[TMP19]], splat (i64 16)
 ; AVX2-NEXT:    [[TMP21:%.*]] = or <2 x i64> [[TMP18]], [[TMP20]]
 ; AVX2-NEXT:    [[TMP22:%.*]] = zext <2 x i16> [[TMP4]] to <2 x i64>
 ; AVX2-NEXT:    [[TMP23:%.*]] = or <2 x i64> [[TMP21]], [[TMP22]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll
index b1bc5d14550ef7..26258402b9781d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr49933.ll
@@ -4,7 +4,7 @@
 define void @foo(ptr noalias nocapture %t0, ptr noalias nocapture readonly %t1) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[T1:%.*]], align 1, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <8 x i8> [[TMP2]], <i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <8 x i8> [[TMP2]], splat (i8 64)
 ; CHECK-NEXT:    [[TMP4:%.*]] = sub <8 x i8> zeroinitializer, [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP2]], <8 x i8> [[TMP4]]
 ; CHECK-NEXT:    store <8 x i8> [[TMP5]], ptr [[T0:%.*]], align 1, !tbaa [[TBAA0]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
index 6f70aa5f4f6afb..cb02f4d10923c8 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-unknown-unknown"
 define void @exact(ptr %x) {
 ; CHECK-LABEL: @exact(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -40,7 +40,7 @@ define void @exact(ptr %x) {
 define void @not_exact(ptr %x) {
 ; CHECK-LABEL: @not_exact(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -69,7 +69,7 @@ define void @not_exact(ptr %x) {
 define void @nsw(ptr %x) {
 ; CHECK-LABEL: @nsw(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -98,7 +98,7 @@ define void @nsw(ptr %x) {
 define void @not_nsw(ptr %x) {
 ; CHECK-LABEL: @not_nsw(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -127,7 +127,7 @@ define void @not_nsw(ptr %x) {
 define void @nuw(ptr %x) {
 ; CHECK-LABEL: @nuw(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -156,7 +156,7 @@ define void @nuw(ptr %x) {
 define void @not_nuw(ptr %x) {
 ; CHECK-LABEL: @not_nuw(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -185,7 +185,7 @@ define void @not_nuw(ptr %x) {
 define void @not_nsw_but_nuw(ptr %x) {
 ; CHECK-LABEL: @not_nsw_but_nuw(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -214,7 +214,7 @@ define void @not_nsw_but_nuw(ptr %x) {
 define void @nnan(ptr %x) {
 ; CHECK-LABEL: @nnan(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -243,7 +243,7 @@ define void @nnan(ptr %x) {
 define void @not_nnan(ptr %x) {
 ; CHECK-LABEL: @not_nnan(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -272,7 +272,7 @@ define void @not_nnan(ptr %x) {
 define void @only_fast(ptr %x) {
 ; CHECK-LABEL: @only_fast(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -301,7 +301,7 @@ define void @only_fast(ptr %x) {
 define void @only_arcp(ptr %x) {
 ; CHECK-LABEL: @only_arcp(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+; CHECK-NEXT:    [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], splat (float 1.000000e+00)
 ; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -330,8 +330,8 @@ define void @only_arcp(ptr %x) {
 define void @addsub_all_nsw(ptr %x) {
 ; CHECK-LABEL: @addsub_all_nsw(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
@@ -361,8 +361,8 @@ define void @addsub_all_nsw(ptr %x) {
 define void @addsub_some_nsw(ptr %x) {
 ; CHECK-LABEL: @addsub_some_nsw(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
@@ -392,8 +392,8 @@ define void @addsub_some_nsw(ptr %x) {
 define void @addsub_no_nsw(ptr %x) {
 ; CHECK-LABEL: @addsub_no_nsw(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], splat (i32 1)
+; CHECK-NEXT:    [[TMP4:%.*]] = sub <4 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
 ; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[X]], align 4
 ; CHECK-NEXT:    ret void
@@ -424,7 +424,7 @@ define void @fcmp_fast(ptr %x) #1 {
 ; CHECK-LABEL: @fcmp_fast(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = fsub fast <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub fast <2 x double> splat (double -0.000000e+00), [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
 ; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[X]], align 8
 ; CHECK-NEXT:    ret void
@@ -482,7 +482,7 @@ define void @fcmp_no_fast(ptr %x) #1 {
 ; CHECK-LABEL: @fcmp_no_fast(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[X:%.*]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x double> splat (double -0.000000e+00), [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]]
 ; CHECK-NEXT:    store <2 x double> [[TMP5]], ptr [[X]], align 8
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll
index 4c8d6b734ddc15..789ac9ef23b311 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-bool-logic-op-inside.ll
@@ -24,7 +24,7 @@ define i1 @test1(i32 %x, i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A]], i32 1
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B]], i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[C]], i32 3
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP4]], splat (i32 1)
 ; CHECK-NEXT:    [[CMP3:%.*]] = icmp sgt i32 [[D]], 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
index 6200e3ae43fc98..0771fabef3e028 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll
@@ -274,8 +274,8 @@ define i1 @logical_and_icmp_clamp_extra_use_cmp(<4 x i32> %x) {
 
 define i1 @logical_and_icmp_clamp_extra_use_select(<4 x i32> %x) {
 ; CHECK-LABEL: @logical_and_icmp_clamp_extra_use_select(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], splat (i32 42)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17)
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
 ; CHECK-NEXT:    [[S1:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
@@ -352,9 +352,9 @@ define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) {
 ; CHECK-LABEL: @logical_and_icmp_clamp_partial(
 ; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], <i32 42, i32 42>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <2 x i32> [[TMP2]], splat (i32 42)
 ; CHECK-NEXT:    [[C2:%.*]] = icmp slt i32 [[TMP1]], 42
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], <i32 17, i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], splat (i32 17)
 ; CHECK-NEXT:    [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]])
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
@@ -481,7 +481,7 @@ define i1 @logical_or_icmp_extra_op(<4 x i32> %x, <4 x i32> %y, i1 %c) {
 
 define i1 @logical_and_icmp_extra_args(<4 x i32> %x, i1 %c0, i1 %c1, i1 %c2) {
 ; CHECK-LABEL: @logical_and_icmp_extra_args(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], <i32 17, i32 17, i32 17, i32 17>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], splat (i32 17)
 ; CHECK-NEXT:    [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP2]])
 ; CHECK-NEXT:    [[OP_RDX:%.*]] = select i1 [[TMP3]], i1 [[C0:%.*]], i1 false
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
index ce9158d8bf2eea..35595b79cd708b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
@@ -22,7 +22,7 @@ define i32 @reduce(ptr nocapture %A, i32 %n, i32 %m) {
 ; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[I_015]], 1
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 [[MUL]]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x double> [[TMP0]], <double 7.000000e+00, double 7.000000e+00>
+; CHECK-NEXT:    [[TMP1:%.*]] = fmul <2 x double> [[TMP0]], splat (double 7.000000e+00)
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP1]], i32 1
 ; CHECK-NEXT:    [[ADD5:%.*]] = fadd double [[TMP2]], [[TMP3]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
index 5dcd5d3190ad0c..29a8a229980e92 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction2.ll
@@ -52,7 +52,7 @@ define double @foo(ptr nocapture %D) {
 
 define i1 @two_wide_fcmp_reduction(<2 x double> %a0) {
 ; CHECK-LABEL: @two_wide_fcmp_reduction(
-; CHECK-NEXT:    [[A:%.*]] = fcmp ogt <2 x double> [[A0:%.*]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[A:%.*]] = fcmp ogt <2 x double> [[A0:%.*]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[B:%.*]] = extractelement <2 x i1> [[A]], i32 0
 ; CHECK-NEXT:    [[C:%.*]] = extractelement <2 x i1> [[A]], i32 1
 ; CHECK-NEXT:    [[D:%.*]] = and i1 [[B]], [[C]]
@@ -67,7 +67,7 @@ define i1 @two_wide_fcmp_reduction(<2 x double> %a0) {
 
 define double @fadd_reduction(<2 x double> %a0) {
 ; CHECK-LABEL: @fadd_reduction(
-; CHECK-NEXT:    [[A:%.*]] = fadd fast <2 x double> [[A0:%.*]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[A:%.*]] = fadd fast <2 x double> [[A0:%.*]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[B:%.*]] = extractelement <2 x double> [[A]], i32 0
 ; CHECK-NEXT:    [[C:%.*]] = extractelement <2 x double> [[A]], i32 1
 ; CHECK-NEXT:    [[D:%.*]] = fadd fast double [[B]], [[C]]
@@ -102,7 +102,7 @@ define i1 @fcmp_lt_gt(double %a, double %b, double %c) {
 ; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP4]]
 ; CHECK-NEXT:    br i1 [[OR_COND]], label [[CLEANUP:%.*]], label [[LOR_LHS_FALSE:%.*]]
 ; CHECK:       lor.lhs.false:
-; CHECK-NEXT:    [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP10:%.*]] = fcmp ule <2 x double> [[TMP7]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
 ; CHECK-NEXT:    [[NOT_OR_COND9:%.*]] = or i1 [[TMP11]], [[TMP12]]
@@ -144,7 +144,7 @@ define i1 @fcmp_lt(double %a, double %b, double %c) {
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[MUL]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = fdiv <2 x double> [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = fcmp uge <2 x double> [[TMP8]], <double 0x3EB0C6F7A0B5ED8D, double 0x3EB0C6F7A0B5ED8D>
+; CHECK-NEXT:    [[TMP9:%.*]] = fcmp uge <2 x double> [[TMP8]], splat (double 0x3EB0C6F7A0B5ED8D)
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP9]], i32 1
 ; CHECK-NEXT:    [[NOT_OR_COND:%.*]] = or i1 [[TMP10]], [[TMP11]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
index 042fb88297e2ba..cb955ff91ed810 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/resched.ll
@@ -29,7 +29,7 @@ define fastcc void @_ZN12_GLOBAL__N_127PolynomialMultiplyRecognize9recognizeEv()
 ; CHECK-NEXT:    [[TMP14:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v4i8(<16 x i8> [[TMP12]], <4 x i8> [[TMP13]], i64 4)
 ; CHECK-NEXT:    [[TMP15:%.*]] = trunc <2 x i32> [[TMP2]] to <2 x i8>
 ; CHECK-NEXT:    [[TMP16:%.*]] = call <16 x i8> @llvm.vector.insert.v16i8.v2i8(<16 x i8> [[TMP14]], <2 x i8> [[TMP15]], i64 2)
-; CHECK-NEXT:    [[TMP17:%.*]] = and <16 x i8> [[TMP16]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP17:%.*]] = and <16 x i8> [[TMP16]], splat (i8 1)
 ; CHECK-NEXT:    store <16 x i8> [[TMP17]], ptr undef, align 1
 ; CHECK-NEXT:    ret void
 ; CHECK:       if.end50.i:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll
index 6bbc33bac80f9d..79ce74bd21dbcf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reuse-extracts-in-wider-vect.ll
@@ -14,7 +14,7 @@ define i32 @foo(i32 %0, ptr %1, ptr %2)  {
 ; CHECK-NEXT:    br label [[T37:%.*]]
 ; CHECK:       t37:
 ; CHECK-NEXT:    [[TMP6:%.*]] = phi <2 x float> [ [[TMP5]], [[TMP3:%.*]] ], [ [[T89:%.*]], [[T37]] ]
-; CHECK-NEXT:    [[TMP7:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[TMP6]]
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[T21:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[T4]], i64 0, i32 2, i64 0
 ; CHECK-NEXT:    store <4 x float> [[SHUFFLE]], ptr [[T21]], align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll
index 14033f28b93163..d1b177e2b6dea5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-scalars-in-buildvector.ll
@@ -5,7 +5,7 @@ define void @test(ptr %p, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
 ; CHECK-LABEL: @test(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[B:%.*]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = sub <2 x i32> [[TMP2]], splat (i32 1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[E:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[F:%.*]], i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll
index f10e0a84081669..26e62d36fb6a82 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule-bundle.ll
@@ -11,12 +11,12 @@ define i32 @slp_schedule_bundle() local_unnamed_addr #0 {
 ; CHECK-LABEL: @slp_schedule_bundle(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr @b, align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[TMP0]], <i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> [[TMP0]], splat (i32 31)
+; CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 1)
 ; CHECK-NEXT:    store <4 x i32> [[TMP2]], ptr @a, align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr getelementptr ([1 x i32], ptr @b, i64 4, i64 0), align 4
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr <2 x i32> [[TMP3]], <i32 31, i32 31>
-; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 31)
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], splat (i32 1)
 ; CHECK-NEXT:    store <2 x i32> [[TMP5]], ptr getelementptr ([1 x i32], ptr @a, i64 4, i64 0), align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
index a009b1eaf65f13..242d66fda569a6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shrink_after_reorder.ll
@@ -8,7 +8,7 @@ define void @wombat(ptr %ptr, ptr %ptr1) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i32> [[TMP2]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt <4 x i32> [[TMP1]], undef
 ; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> undef, <4 x i32> [[TMP4]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
index 89c64d64b9c844..1d101318bdf876 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/simple-loop.ll
@@ -14,7 +14,7 @@ define i32 @rollable(ptr noalias nocapture %in, ptr noalias nocapture %out, i64
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], <i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], splat (i32 7)
 ; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i32> [[TMP6]], <i32 7, i32 14, i32 21, i32 28>
 ; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP4]], align 4
 ; CHECK-NEXT:    [[TMP8]] = add i64 [[I_019]], 1
@@ -76,11 +76,11 @@ define i32 @unrollable(ptr %in, ptr %out, i64 %n) nounwind ssp uwtable {
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 [[TMP2]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = mul <2 x i32> [[TMP7]], <i32 7, i32 7>
+; CHECK-NEXT:    [[TMP8:%.*]] = mul <2 x i32> [[TMP7]], splat (i32 7)
 ; CHECK-NEXT:    [[TMP9:%.*]] = add <2 x i32> [[TMP8]], <i32 7, i32 14>
 ; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[TMP4]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = load <2 x i32>, ptr [[TMP5]], align 4
-; CHECK-NEXT:    [[TMP12:%.*]] = mul <2 x i32> [[TMP11]], <i32 7, i32 7>
+; CHECK-NEXT:    [[TMP12:%.*]] = mul <2 x i32> [[TMP11]], splat (i32 7)
 ; CHECK-NEXT:    [[TMP13:%.*]] = add <2 x i32> [[TMP12]], <i32 21, i32 28>
 ; CHECK-NEXT:    store <2 x i32> [[TMP9]], ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[BARRIER:%.*]] = call i32 @goo(i32 0)
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll
index a0e17446b6abb1..917baaa86460c4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sitofp-minbitwidth-node.ll
@@ -7,7 +7,7 @@ define void @foo(ptr %ptr) {
 ; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 328
 ; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 334
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i16>, ptr [[GEP0]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = xor <2 x i16> [[TMP1]], <i16 -1, i16 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <2 x i16> [[TMP1]], splat (i16 -1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = uitofp <2 x i16> [[TMP3]] to <2 x double>
 ; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i16>, ptr [[GEP3]], align 2
 ; CHECK-NEXT:    [[TMP6:%.*]] = zext <2 x i16> [[TMP5]] to <2 x i32>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll
index f4a0fc84cee8e6..88a0ba59fa633c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/stackrestore-dependence.ll
@@ -6,7 +6,7 @@ define void @stackrestore1(ptr %out) {
 ; CHECK-LABEL: @stackrestore1(
 ; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
 ; CHECK-NEXT:    [[LOCAL_ALLOCA:%.*]] = alloca [16 x i8], align 4
-; CHECK-NEXT:    store <4 x float> <float 0x3FF3333340000000, float 0x3FF3333340000000, float 0x3FF3333340000000, float 0x3FF3333340000000>, ptr [[LOCAL_ALLOCA]], align 4
+; CHECK-NEXT:    store <4 x float> splat (float 0x3FF3333340000000), ptr [[LOCAL_ALLOCA]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[LOCAL_ALLOCA]], align 4
 ; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
index e93c5244dfbe2c..977942aa06c51a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll
@@ -8,7 +8,7 @@ define void @basecase(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @basecase(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    store ptr null, ptr [[A]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1)
 ; CHECK-NEXT:    store <2 x ptr> [[TMP2]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -34,7 +34,7 @@ define void @allocas(ptr %a, ptr %b, ptr %c) {
 ; CHECK-NEXT:    [[V2:%.*]] = alloca i8, align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1
 ; CHECK-NEXT:    store ptr [[TMP4]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
@@ -126,7 +126,7 @@ define void @stacksave2(ptr %a, ptr %b, ptr %c) {
 ; CHECK-NEXT:    [[V2:%.*]] = alloca inalloca i8, align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1
 ; CHECK-NEXT:    store ptr [[TMP4]], ptr [[A:%.*]], align 8
 ; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]]
@@ -161,7 +161,7 @@ define void @stacksave3(ptr %a, ptr %b, ptr %c) {
 ; CHECK-NEXT:    call void @llvm.stackrestore.p0(ptr [[STACK]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1)
 ; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -188,7 +188,7 @@ define void @stacksave3(ptr %a, ptr %b, ptr %c) {
 define void @stacksave4(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @stacksave4(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
 ; CHECK-NEXT:    [[X:%.*]] = alloca inalloca i8, align 1
 ; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]]
@@ -218,7 +218,7 @@ define void @stacksave4(ptr %a, ptr %b, ptr %c) {
 define void @stacksave5(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @stacksave5(
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x ptr>, ptr [[A:%.*]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, <2 x ptr> [[TMP1]], <2 x i32> splat (i32 1)
 ; CHECK-NEXT:    [[STACK:%.*]] = call ptr @llvm.stacksave.p0()
 ; CHECK-NEXT:    [[X:%.*]] = alloca inalloca i8, align 1
 ; CHECK-NEXT:    call void @use(ptr inalloca(i8) [[X]]) #[[ATTR4]]
@@ -258,7 +258,7 @@ define void @stackrestore1(ptr %a, ptr %b, ptr %c) {
 ; CHECK-NEXT:    store i8 0, ptr [[V2]], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> splat (i32 1)
 ; CHECK-NEXT:    store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll
index 378403ac304683..02f8c552fec887 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/stores_vectorize.ll
@@ -26,7 +26,7 @@ define void @_Z8DistanceIlLi5EEvPfiPmS0_(ptr %p1, i32 %p2, ptr %p3, ptr %p4) {
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], splat (i64 5)
 ; CHECK-NEXT:    store <4 x i64> [[TMP4]], ptr [[P3]], align 8
 ; CHECK-NEXT:    [[ADD_PTR11:%.*]] = getelementptr inbounds float, ptr [[ADD_PTR]], i64 [[IDX_EXT]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i64> [[TMP4]], i32 0
@@ -145,7 +145,7 @@ define void @store15(ptr %p1, i32 %p2, ptr %p3, ptr %p4) {
 ; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
 ; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], splat (i64 5)
 ; CHECK-NEXT:    store <4 x i64> [[TMP4]], ptr [[P3]], align 8
 ; CHECK-NEXT:    ret void
 ;
@@ -223,7 +223,7 @@ define void @store16(ptr %p1, i32 %p2, ptr %p3, ptr %p4) {
 ; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
 ; CHECK-NEXT:    store i64 5, ptr [[ARRAYIDX9]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i64>, ptr [[P3]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], <i64 5, i64 5, i64 5, i64 5>
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr <4 x i64> [[TMP3]], splat (i64 5)
 ; CHECK-NEXT:    store <4 x i64> [[TMP4]], ptr [[P3]], align 8
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll
index 2f126df81f2305..83e1bef8fa066a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/subvector-minbitwidth-unsigned-value.ll
@@ -26,7 +26,7 @@ define i1 @test(i64 %v1, ptr %v2, i32 %v3, i1 %v4) {
 ; CHECK-NEXT:    [[CONV_I_I1743_3:%.*]] = fptoui float [[TMP17]] to i32
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[CONV_I_I1743_3]], 0
 ; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <4 x float> [[TMP16]] to <4 x i32>
-; CHECK-NEXT:    [[TMP20:%.*]] = icmp ult <4 x i32> [[TMP19]], <i32 1333788672, i32 1333788672, i32 1333788672, i32 1333788672>
+; CHECK-NEXT:    [[TMP20:%.*]] = icmp ult <4 x i32> [[TMP19]], splat (i32 1333788672)
 ; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP20]], i32 3
 ; CHECK-NEXT:    [[NARROW:%.*]] = select i1 [[TMP21]], i1 [[TMP18]], i1 false
 ; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x float> [[TMP16]], i32 2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll b/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll
index 69ae26b9f25856..912b148c310068 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/supernode.ll
@@ -218,7 +218,7 @@ define void @supernode_scheduling_cross_block(ptr %Aarray, ptr %Barray, ptr %Sar
 ; ENABLED-NEXT:    [[B1:%.*]] = load double, ptr [[IDXB1]], align 8
 ; ENABLED-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
 ; ENABLED-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[B1]], i32 1
-; ENABLED-NEXT:    [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], <double 2.000000e+00, double 2.000000e+00>
+; ENABLED-NEXT:    [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], splat (double 2.000000e+00)
 ; ENABLED-NEXT:    br label [[BB:%.*]]
 ; ENABLED:       bb:
 ; ENABLED-NEXT:    [[A1:%.*]] = load double, ptr [[IDXA1]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
index 71b957ac7bbd89..dbd4adea4a4ece 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/unreachable.ll
@@ -15,7 +15,7 @@ define void @foo(ptr nocapture %x) #0 {
 ; CHECK-NEXT:    [[BAD:%.*]] = fadd float [[BAD]], 0.000000e+00
 ; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ poison, [[BB1:%.*]] ], [ <i32 2, i32 2, i32 2, i32 2>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ poison, [[BB1:%.*]] ], [ splat (i32 2), [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    store <4 x i32> [[TMP0]], ptr [[X:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll
index 96d4b84e036918..6e2a43ac5f9f10 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-base.ll
@@ -166,7 +166,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) {
 ; NON-POW2-NEXT:  entry:
 ; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
 ; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4
-; NON-POW2-NEXT:    [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>
+; NON-POW2-NEXT:    [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01)
 ; NON-POW2-NEXT:    store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4
 ; NON-POW2-NEXT:    ret void
 ;
@@ -177,7 +177,7 @@ define void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) {
 ; POW2-ONLY-NEXT:    [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
 ; POW2-ONLY-NEXT:    [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01
 ; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4
-; POW2-ONLY-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], <float 1.000000e+01, float 1.000000e+01>
+; POW2-ONLY-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01)
 ; POW2-ONLY-NEXT:    store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4
 ; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2
 ; POW2-ONLY-NEXT:    store float [[FADD_2]], ptr [[DST_2]], align 4
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
index 373feb4a658f0b..0ed12760b563fa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias-inseltpoison.ll
@@ -52,7 +52,7 @@ define void @test(ptr nocapture %t2) {
 ; CHECK-NEXT:    [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
 ; CHECK-NEXT:    [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7
-; CHECK-NEXT:    [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3)
 ; CHECK-NEXT:    store <8 x i32> [[T76]], ptr [[T2]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
index da935173b20d11..f47373747e578a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias.ll
@@ -52,7 +52,7 @@ define void @test(ptr nocapture %t2) {
 ; CHECK-NEXT:    [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
 ; CHECK-NEXT:    [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
 ; CHECK-NEXT:    [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7
-; CHECK-NEXT:    [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3)
 ; CHECK-NEXT:    store <8 x i32> [[T76]], ptr [[T2]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll
index e52b29a7f681c7..d650a972ad8ca2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vec_list_bias_external_insert_shuffled.ll
@@ -47,7 +47,7 @@ define void @test(ptr nocapture %t2) {
 ; CHECK-NEXT:    [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
 ; CHECK-NEXT:    [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7
-; CHECK-NEXT:    [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[T76:%.*]] = shl <8 x i32> [[T72]], splat (i32 3)
 ; CHECK-NEXT:    store <8 x i32> [[T76]], ptr [[T2]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
index e30cb76d53d928..869a9d1aee80e3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll
@@ -225,8 +225,8 @@ define void @addsub1(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 2
 ; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <2 x i32> [[TMP0]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], splat (i32 -1)
+; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw <2 x i32> [[TMP0]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP2]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    store <2 x i32> [[TMP3]], ptr [[DST]], align 4
 ; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 3
@@ -599,8 +599,8 @@ define void @addsub1f(ptr noalias %dst, ptr noalias %src) {
 ; CHECK-NEXT:    [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 2
 ; CHECK-NEXT:    [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[SRC]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <2 x float> [[TMP0]], <float -1.000000e+00, float -1.000000e+00>
-; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast <2 x float> [[TMP0]], <float -1.000000e+00, float -1.000000e+00>
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <2 x float> [[TMP0]], splat (float -1.000000e+00)
+; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast <2 x float> [[TMP0]], splat (float -1.000000e+00)
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr [[DST]], align 4
 ; CHECK-NEXT:    [[INCDEC_PTR4:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 3
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll
index f29d4883049194..79bef36515a88a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-pair-path.ll
@@ -24,7 +24,7 @@ define double @root_selection(double %a, double %b, double %c, double %d) local_
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul fast <2 x double> [[TMP6]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = fsub fast <2 x double> [[TMP7]], <double 1.100000e+01, double undef>
 ; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <2 x double> [[TMP8]], <double 1.200000e+01, double 4.000000e+00>
-; CHECK-NEXT:    [[TMP10:%.*]] = fdiv fast <2 x double> [[TMP9]], <double 1.400000e+00, double 1.400000e+00>
+; CHECK-NEXT:    [[TMP10:%.*]] = fdiv fast <2 x double> [[TMP9]], splat (double 1.400000e+00)
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x double> [[TMP10]], i32 1
 ; CHECK-NEXT:    [[I07:%.*]] = fadd fast double undef, [[TMP11]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <2 x double> [[TMP10]], i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
index 287b623f63690a..ad4daeab003f5f 100644
--- a/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll
@@ -153,7 +153,7 @@ define <2 x i8> @replace_through_binop_fail_cant_speculate(i8 %inp, <2 x i8> %d,
 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[INP]], 5
 ; CHECK-NEXT:    [[V0:%.*]] = insertelement <2 x i8> poison, i8 [[INP]], i64 0
 ; CHECK-NEXT:    [[V:%.*]] = insertelement <2 x i8> [[V0]], i8 [[ADD]], i64 1
-; CHECK-NEXT:    [[DIV0:%.*]] = sdiv <2 x i8> <i8 -128, i8 -128>, [[V]]
+; CHECK-NEXT:    [[DIV0:%.*]] = sdiv <2 x i8> splat (i8 -128), [[V]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor i8 [[INP]], 123
 ; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x i8> [[DIV0]], i8 [[TMP1]], i64 0
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
diff --git a/llvm/test/Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll
index f0d5629cfc22e9..3cf359fd47351a 100644
--- a/llvm/test/Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll
+++ b/llvm/test/Transforms/SLPVectorizer/call-arg-reduced-by-minbitwidth.ll
@@ -12,8 +12,8 @@ define void @test(ptr %0, i8 %1, i1 %cmp12.i) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i8> [[TMP4]], <8 x i8> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[PRE:%.*]]
 ; CHECK:       pre:
-; CHECK-NEXT:    [[TMP8:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP5]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
-; CHECK-NEXT:    [[TMP9:%.*]] = add <8 x i8> [[TMP8]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP5]], <8 x i8> splat (i8 1))
+; CHECK-NEXT:    [[TMP9:%.*]] = add <8 x i8> [[TMP6]], splat (i8 1)
 ; CHECK-NEXT:    [[TMP10:%.*]] = select <8 x i1> [[TMP3]], <8 x i8> [[TMP9]], <8 x i8> [[TMP5]]
 ; CHECK-NEXT:    store <8 x i8> [[TMP10]], ptr [[TMP0]], align 1
 ; CHECK-NEXT:    br label [[PRE]]
diff --git a/llvm/test/Transforms/SLPVectorizer/extended-vectorized-gathered-inst.ll b/llvm/test/Transforms/SLPVectorizer/extended-vectorized-gathered-inst.ll
index 94aa8de5fdb72c..118507c790bfc8 100644
--- a/llvm/test/Transforms/SLPVectorizer/extended-vectorized-gathered-inst.ll
+++ b/llvm/test/Transforms/SLPVectorizer/extended-vectorized-gathered-inst.ll
@@ -14,7 +14,7 @@ define void @test(ptr %top) {
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i8> <i8 0, i8 0, i8 0, i8 poison>, i8 [[TMP4]], i32 3
 ; CHECK-NEXT:    [[TMP6:%.*]] = or <4 x i8> [[TMP1]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = or <4 x i8> [[TMP6]], zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i8> [[TMP7]], <i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr <4 x i8> [[TMP7]], splat (i8 2)
 ; CHECK-NEXT:    br label [[FOR_COND_I:%.*]]
 ; CHECK:       for.cond.i:
 ; CHECK-NEXT:    store <4 x i8> [[TMP8]], ptr null, align 1
diff --git a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll
index 5f02b0009367c7..74e52da6880400 100644
--- a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector-inseltpoison.ll
@@ -469,7 +469,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 3.000000e+00, double 2.000000e+00, double 1.000000e+00, double 0.000000e+00>
-; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    ret <4 x double> [[TMP6]]
 ;
   %t0 = fadd double %w , 0.000000e+00
diff --git a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll
index 63d55f7b1deb58..8e3a941932c976 100644
--- a/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/insert-element-build-vector.ll
@@ -503,7 +503,7 @@ define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 3.000000e+00, double 2.000000e+00, double 1.000000e+00, double 0.000000e+00>
-; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    ret <4 x double> [[TMP6]]
 ;
   %t0 = fadd double %w , 0.000000e+00
diff --git a/llvm/test/Transforms/SLPVectorizer/jumbled_store_crash.ll b/llvm/test/Transforms/SLPVectorizer/jumbled_store_crash.ll
index f53e22d89d5169..50cc97a529f5fb 100644
--- a/llvm/test/Transforms/SLPVectorizer/jumbled_store_crash.ll
+++ b/llvm/test/Transforms/SLPVectorizer/jumbled_store_crash.ll
@@ -23,7 +23,7 @@ define dso_local void @j() local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX1]], align 4
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <2 x i32> [[TMP5]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = sitofp <2 x i32> [[TMP6]] to <2 x float>
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x float> [[TMP7]], <float 1.000000e+01, float 1.000000e+01>
+; CHECK-NEXT:    [[TMP8:%.*]] = fmul <2 x float> [[TMP7]], splat (float 1.000000e+01)
 ; CHECK-NEXT:    [[TMP9:%.*]] = fsub <2 x float> <float 1.000000e+00, float 0.000000e+00>, [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x float> [[TMP10]], i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll b/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll
index 408dda2211b8d1..62417268bf3d0a 100644
--- a/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll
+++ b/llvm/test/Transforms/SLPVectorizer/operand-is-reduced-val.ll
@@ -10,8 +10,8 @@ define i64 @src(i32 %a) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP3:%.*]] = sext <4 x i32> [[TMP2]] to <4 x i64>
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], <i64 4294967297, i64 4294967297, i64 4294967297, i64 4294967297>
-; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i64> [[TMP4]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i64> [[TMP3]], splat (i64 4294967297)
+; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i64> [[TMP4]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i64> poison, i64 [[TMP16]], i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll b/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll
index 2037e0d67d2f89..4478eab7b827ab 100644
--- a/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll
+++ b/llvm/test/Transforms/SLPVectorizer/phi-node-bitwidt-op-not.ll
@@ -10,7 +10,7 @@ define i32 @test(ptr %b, ptr %c, i32 %0, ptr %a, i1 %tobool3.not) {
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> [[TMP2]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i1> [[TMP4]] to <4 x i16>
 ; CHECK-NEXT:    br label [[BB3:%.*]]
@@ -22,8 +22,8 @@ define i32 @test(ptr %b, ptr %c, i32 %0, ptr %a, i1 %tobool3.not) {
 ; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i1> poison, i1 [[TOBOOL3_NOT]], i32 0
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i1> [[TMP10]], <4 x i1> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP7]], <4 x i32> [[TMP9]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shl <4 x i32> [[TMP12]], <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    [[TMP14:%.*]] = ashr <4 x i32> [[TMP13]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP13:%.*]] = shl <4 x i32> [[TMP12]], splat (i32 16)
+; CHECK-NEXT:    [[TMP14:%.*]] = ashr <4 x i32> [[TMP13]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP15:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16>
 ; CHECK-NEXT:    br i1 true, label [[BB3]], label [[BB2]]
 ; CHECK:       bb3:
diff --git a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll
index 4074b8654362e0..3ef0de177b478b 100644
--- a/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/reduction-whole-regs-loads.ll
@@ -13,7 +13,7 @@ define i64 @test(ptr %p) {
 ; RISCV-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP0]], <4 x i64> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 0>
 ; RISCV-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v4i64(<8 x i64> [[TMP2]], <4 x i64> [[TMP0]], i64 0)
 ; RISCV-NEXT:    [[TMP4:%.*]] = call <8 x i64> @llvm.vector.insert.v8i64.v2i64(<8 x i64> [[TMP3]], <2 x i64> [[TMP1]], i64 4)
-; RISCV-NEXT:    [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], <i64 42, i64 42, i64 42, i64 42, i64 42, i64 42, i64 42, i64 42>
+; RISCV-NEXT:    [[TMP5:%.*]] = mul <8 x i64> [[TMP4]], splat (i64 42)
 ; RISCV-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP5]])
 ; RISCV-NEXT:    ret i64 [[TMP6]]
 ;
@@ -21,7 +21,7 @@ define i64 @test(ptr %p) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <6 x i64>, ptr [[P:%.*]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <6 x i64> [[TMP0]], <6 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 0, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], <i64 42, i64 42, i64 42, i64 42, i64 42, i64 42, i64 42, i64 42>
+; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i64> [[TMP1]], splat (i64 42)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP2]])
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/reduction_loads.ll b/llvm/test/Transforms/SLPVectorizer/reduction_loads.ll
index 1b37c0a77e94e0..c1efc85da554d4 100644
--- a/llvm/test/Transforms/SLPVectorizer/reduction_loads.ll
+++ b/llvm/test/Transforms/SLPVectorizer/reduction_loads.ll
@@ -27,7 +27,7 @@ define i32 @test(ptr nocapture readonly %p) {
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_RDX:%.*]], [[FOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
+; CHECK-NEXT:    [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], splat (i32 42)
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP2]])
 ; CHECK-NEXT:    [[OP_RDX]] = add i32 [[TMP3]], [[SUM]]
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]]
diff --git a/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll b/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll
index 3e00550a885215..b37c4c97a5d137 100644
--- a/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll
+++ b/llvm/test/Transforms/SLPVectorizer/reused-buildvector-matching-vectorized-node.ll
@@ -22,12 +22,12 @@ define void @blam(ptr %arg, double %load2, i1 %fcmp3) {
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> <double 0.000000e+00, double poison>, <2 x i32> <i32 2, i32 0>
 ; CHECK-NEXT:    [[TMP13:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]]
 ; CHECK-NEXT:    [[TMP14:%.*]] = fcmp olt <2 x double> [[TMP13]], zeroinitializer
-; CHECK-NEXT:    [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> <double 1.000000e+00, double 1.000000e+00>
+; CHECK-NEXT:    [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP16:%.*]] = fcmp ogt <2 x double> [[TMP15]], zeroinitializer
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP18:%.*]] = select <2 x i1> [[TMP16]], <2 x double> zeroinitializer, <2 x double> [[TMP17]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = fcmp olt <2 x double> [[TMP18]], zeroinitializer
-; CHECK-NEXT:    [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> <double 1.000000e+00, double 1.000000e+00>, <2 x double> zeroinitializer
+; CHECK-NEXT:    [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> splat (double 1.000000e+00), <2 x double> zeroinitializer
 ; CHECK-NEXT:    store <2 x double> [[TMP20]], ptr [[GETELEMENTPTR13]], align 8
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll b/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll
index 965bfc7074c638..4e31095a1d49a5 100644
--- a/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll
+++ b/llvm/test/Transforms/SLPVectorizer/revec-fix-109835.ll
@@ -18,10 +18,10 @@ define void @e() {
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <16 x i32> [[DOTSPLAT]], <i32 -4, i32 5, i32 14, i32 23, i32 32, i32 41, i32 50, i32 59, i32 68, i32 77, i32 86, i32 95, i32 104, i32 113, i32 122, i32 131>
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <16 x i32> [[DOTSPLAT]], <i32 -2, i32 7, i32 16, i32 25, i32 34, i32 43, i32 52, i32 61, i32 70, i32 79, i32 88, i32 97, i32 106, i32 115, i32 124, i32 133>
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <16 x i32> [[DOTSPLAT]], <i32 0, i32 9, i32 18, i32 27, i32 36, i32 45, i32 54, i32 63, i32 72, i32 81, i32 90, i32 99, i32 108, i32 117, i32 126, i32 135>
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult <16 x i32> [[TMP1]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult <16 x i32> [[TMP2]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult <16 x i32> [[TMP3]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult <16 x i32> [[INDUCTION]], <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult <16 x i32> [[TMP1]], splat (i32 2)
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult <16 x i32> [[TMP2]], splat (i32 2)
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult <16 x i32> [[TMP3]], splat (i32 2)
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult <16 x i32> [[INDUCTION]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq <16 x i32> [[DOTSPLAT]], <i32 -1, i32 -10, i32 -19, i32 -28, i32 -37, i32 -46, i32 -55, i32 -64, i32 -73, i32 -82, i32 -91, i32 -100, i32 -109, i32 -118, i32 -127, i32 -136>
 ; CHECK-NEXT:    [[TMP9:%.*]] = or <16 x i1> [[TMP4]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i1> [[TMP9]], [[TMP6]]
diff --git a/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll b/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll
index 2f0bd4a8f1315c..179c5b2f7f91cd 100644
--- a/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll
+++ b/llvm/test/Transforms/SLPVectorizer/shrink_after_reorder2.ll
@@ -9,14 +9,14 @@ define void @foo(ptr %this, ptr %p, i32 %add7) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 undef>, i32 [[ADD7:%.*]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <2 x i32> [[TMP0]], <i32 2, i32 2>
+; CHECK-NEXT:    [[TMP1:%.*]] = sdiv <2 x i32> [[TMP0]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
 ; CHECK-NEXT:    switch i32 undef, label [[SW_EPILOG:%.*]] [
 ; CHECK-NEXT:      i32 0, label [[SW_BB:%.*]]
 ; CHECK-NEXT:      i32 2, label [[SW_BB]]
 ; CHECK-NEXT:    ]
 ; CHECK:       sw.bb:
-; CHECK-NEXT:    [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP3:%.*]] = xor <2 x i32> [[TMP1]], splat (i32 -1)
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[THIS:%.*]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP4]], [[TMP3]]
 ; CHECK-NEXT:    br label [[SW_EPILOG]]
diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll
index 6a1a23728d3c70..5326b9802ec6de 100644
--- a/llvm/test/Transforms/SROA/tbaa-struct3.ll
+++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll
@@ -240,7 +240,7 @@ define void @slice_store_v2i8_1(ptr %dst, ptr %dst.2, ptr %src) {
 ; CHECK-NEXT:    [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 6
 ; CHECK-NEXT:    [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load <2 x i8>, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 2
 ; CHECK-NEXT:    store <2 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_SROA_0]], align 4
-; CHECK-NEXT:    store <2 x i8> bitcast (<1 x i16> <i16 123> to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4
+; CHECK-NEXT:    store <2 x i8> bitcast (<1 x i16> splat (i16 123) to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4
 ; CHECK-NEXT:    [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4:%.*]] = load <2 x i8>, ptr [[A_SROA_2_SROA_0]], align 4
 ; CHECK-NEXT:    store <2 x i8> [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4]], ptr [[DST_2]], align 2
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 6, i1 true)
diff --git a/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll b/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll
index fd50b1bcec295b..3b6f5ccf3d3ea0 100644
--- a/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll
+++ b/llvm/test/Transforms/Scalarizer/phi-unreachable-pred.ll
@@ -47,7 +47,7 @@ define void @f2() {
 ; CHECK:       for.end:
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
-; CHECK-NEXT:    [[E_SROA_3_2:%.*]] = phi <2 x i64> [ <i64 1, i64 1>, [[FOR_END]] ], [ [[E_SROA_3_2]], [[FOR_BODY2:%.*]] ]
+; CHECK-NEXT:    [[E_SROA_3_2:%.*]] = phi <2 x i64> [ splat (i64 1), [[FOR_END]] ], [ [[E_SROA_3_2]], [[FOR_BODY2:%.*]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ 6, [[FOR_END]] ], [ [[TMP0]], [[FOR_BODY2]] ]
 ; CHECK-NEXT:    br i1 undef, label [[FOR_BODY2]], label [[FOR_COND1_FOR_END7_CRIT_EDGE:%.*]]
 ; CHECK:       for.cond1.for.end7_crit_edge:
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll
index bdfac4f2b30cfd..c86fa349200c54 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-select.ll
@@ -82,7 +82,7 @@ define i32 @basic_veccond(i32 %N, <2 x i1> %cond, <2 x i32> %select_input) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[COND1:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[SELECT_INPUT]], <2 x i32> <i32 42, i32 42>
+; CHECK-NEXT:    [[COND1:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[SELECT_INPUT]], <2 x i32> splat (i32 42)
 ; CHECK-NEXT:    [[VREDUCE:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[COND1]])
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[VREDUCE]], [[RES]]
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I]], 1
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 03db1bba7d22a0..405afd5969a413 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -125,7 +125,7 @@ define i64 @load_after_store(i32 %a, ptr %b, ptr %p) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
 ; CHECK-NEXT:    [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
@@ -234,7 +234,7 @@ define i16 @debug_metadata_diassign(i1 %cond, i16 %a, ptr %p) {
 ; CHECK-LABEL: @debug_metadata_diassign(
 ; CHECK-NEXT:  bb0:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i16.p0(<1 x i16> <i16 7>, ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT:    call void @llvm.masked.store.v1i16.p0(<1 x i16> splat (i16 7), ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
 ; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], i16 3, i16 2
 ; CHECK-NEXT:    ret i16 [[SPEC_SELECT]]
 ;
@@ -311,12 +311,12 @@ define void @threshold_6(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5,
 ; CHECK-LABEL: @threshold_6(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 2>, ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 3>, ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 4>, ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 5>, ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 6>, ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 2), ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 3), ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 4), ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 5), ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 6), ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SimplifyCFG/preserve-store-alignment.ll b/llvm/test/Transforms/SimplifyCFG/preserve-store-alignment.ll
index 00cee73da428c9..c80e22499775aa 100644
--- a/llvm/test/Transforms/SimplifyCFG/preserve-store-alignment.ll
+++ b/llvm/test/Transforms/SimplifyCFG/preserve-store-alignment.ll
@@ -10,16 +10,16 @@ define i32 @align_both_equal() local_unnamed_addr {
 ; CHECK-LABEL: @align_both_equal(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1)
 ; CHECK-NEXT:    store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr @m, align 8
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[TMP2]], 1
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]]
 ; CHECK-NEXT:    [[AND4:%.*]] = and i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1)
 ; CHECK-NEXT:    [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i1 [[TOBOOL]], true
 ; CHECK-NEXT:    [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true
@@ -64,16 +64,16 @@ define i32 @align_not_equal() local_unnamed_addr {
 ; CHECK-LABEL: @align_not_equal(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1)
 ; CHECK-NEXT:    store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr @m, align 8
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[TMP2]], 1
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]]
 ; CHECK-NEXT:    [[AND4:%.*]] = and i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1)
 ; CHECK-NEXT:    [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i1 [[TOBOOL]], true
 ; CHECK-NEXT:    [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true
@@ -118,16 +118,16 @@ define i32 @align_single_zero() local_unnamed_addr {
 ; CHECK-LABEL: @align_single_zero(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1)
 ; CHECK-NEXT:    store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr @m, align 8
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[TMP2]], 1
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]]
 ; CHECK-NEXT:    [[AND4:%.*]] = and i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1)
 ; CHECK-NEXT:    [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i1 [[TOBOOL]], true
 ; CHECK-NEXT:    [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true
@@ -172,16 +172,16 @@ define i32 @align_single_zero_second_greater_default() local_unnamed_addr {
 ; CHECK-LABEL: @align_single_zero_second_greater_default(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1)
 ; CHECK-NEXT:    store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr @m, align 8
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[TMP2]], 1
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]]
 ; CHECK-NEXT:    [[AND4:%.*]] = and i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1)
 ; CHECK-NEXT:    [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i1 [[TOBOOL]], true
 ; CHECK-NEXT:    [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true
@@ -226,16 +226,16 @@ define i32 @align_both_zero() local_unnamed_addr {
 ; CHECK-LABEL: @align_both_zero(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr getelementptr inbounds ([[STRUCT_COUNTERS:%.*]], ptr @counters, i64 0, i32 1), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 1)
 ; CHECK-NEXT:    store <2 x i64> [[TMP1]], ptr getelementptr inbounds ([[STRUCT_COUNTERS]], ptr @counters, i64 0, i32 1), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr @m, align 8
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[TMP2]], 1
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i64 [[AND]], 0
-; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], <i64 2, i64 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <2 x i64> [[TMP0]], splat (i64 2)
 ; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP3]]
 ; CHECK-NEXT:    [[AND4:%.*]] = and i64 [[TMP2]], 2
 ; CHECK-NEXT:    [[TOBOOL5:%.*]] = icmp eq i64 [[AND4]], 0
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <2 x i64> [[TMP4]], splat (i64 1)
 ; CHECK-NEXT:    [[SIMPLIFYCFG_MERGE:%.*]] = select i1 [[TOBOOL5]], <2 x i64> [[TMP4]], <2 x i64> [[TMP5]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = xor i1 [[TOBOOL]], true
 ; CHECK-NEXT:    [[TMP7:%.*]] = xor i1 [[TOBOOL5]], true
diff --git a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll
index 591e74c1341350..d85331f77b12f8 100644
--- a/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll
+++ b/llvm/test/Transforms/StraightLineStrengthReduce/slsr-add.ll
@@ -105,13 +105,13 @@ define void @stride_is_minus_2s(i32 %b, i32 %s) {
 
 define void @stride_is_minus_2s_vec(<2 x i32> %b, <2 x i32> %s) {
 ; CHECK-LABEL: @stride_is_minus_2s_vec(
-; CHECK-NEXT:    [[S6:%.*]] = mul <2 x i32> [[S:%.*]], <i32 6, i32 6>
+; CHECK-NEXT:    [[S6:%.*]] = mul <2 x i32> [[S:%.*]], splat (i32 6)
 ; CHECK-NEXT:    [[T1:%.*]] = add <2 x i32> [[B:%.*]], [[S6]]
 ; CHECK-NEXT:    call void @voo(<2 x i32> [[T1]])
-; CHECK-NEXT:    [[S4:%.*]] = shl <2 x i32> [[S]], <i32 2, i32 2>
+; CHECK-NEXT:    [[S4:%.*]] = shl <2 x i32> [[S]], splat (i32 2)
 ; CHECK-NEXT:    [[T2:%.*]] = add <2 x i32> [[B]], [[S4]]
 ; CHECK-NEXT:    call void @voo(<2 x i32> [[T2]])
-; CHECK-NEXT:    [[S2:%.*]] = shl <2 x i32> [[S]], <i32 1, i32 1>
+; CHECK-NEXT:    [[S2:%.*]] = shl <2 x i32> [[S]], splat (i32 1)
 ; CHECK-NEXT:    [[T3:%.*]] = add <2 x i32> [[B]], [[S2]]
 ; CHECK-NEXT:    call void @voo(<2 x i32> [[T3]])
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
index 775ad4c5ecc363..ebd2c5bd2574b5 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/select-shuffle.ll
@@ -562,7 +562,7 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1,
 ; CHECK-NEXT:    [[TMP55:%.*]] = shufflevector <16 x i8> [[TMP53]], <16 x i8> [[TMP54]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP56:%.*]] = zext <16 x i8> [[TMP55]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP57:%.*]] = sub nsw <16 x i32> [[TMP48]], [[TMP56]]
-; CHECK-NEXT:    [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP40]]
 ; CHECK-NEXT:    [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP59]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP61:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> [[TMP59]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -589,9 +589,9 @@ define dso_local i32 @full(ptr nocapture noundef readonly %p1, i32 noundef %st1,
 ; CHECK-NEXT:    [[TMP82:%.*]] = add nsw <16 x i32> [[TMP79]], [[TMP81]]
 ; CHECK-NEXT:    [[TMP83:%.*]] = sub nsw <16 x i32> [[TMP78]], [[TMP80]]
 ; CHECK-NEXT:    [[TMP84:%.*]] = shufflevector <16 x i32> [[TMP82]], <16 x i32> [[TMP83]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:    [[TMP85:%.*]] = lshr <16 x i32> [[TMP84]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP86:%.*]] = and <16 x i32> [[TMP85]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP87:%.*]] = mul nuw <16 x i32> [[TMP86]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP85:%.*]] = lshr <16 x i32> [[TMP84]], splat (i32 15)
+; CHECK-NEXT:    [[TMP86:%.*]] = and <16 x i32> [[TMP85]], splat (i32 65537)
+; CHECK-NEXT:    [[TMP87:%.*]] = mul nuw <16 x i32> [[TMP86]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP88:%.*]] = add <16 x i32> [[TMP87]], [[TMP84]]
 ; CHECK-NEXT:    [[TMP89:%.*]] = xor <16 x i32> [[TMP88]], [[TMP87]]
 ; CHECK-NEXT:    [[TMP90:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP89]])
@@ -768,7 +768,7 @@ define i32 @full_reorder(ptr nocapture noundef readonly %pix1, i32 noundef %i_pi
 ; CHECK-NEXT:    [[TMP39:%.*]] = shufflevector <16 x i8> [[TMP37]], <16 x i8> [[TMP38]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
 ; CHECK-NEXT:    [[TMP40:%.*]] = zext <16 x i8> [[TMP39]] to <16 x i32>
 ; CHECK-NEXT:    [[TMP41:%.*]] = sub nsw <16 x i32> [[TMP33]], [[TMP40]]
-; CHECK-NEXT:    [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[TMP42:%.*]] = shl nsw <16 x i32> [[TMP41]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP43:%.*]] = add nsw <16 x i32> [[TMP42]], [[TMP26]]
 ; CHECK-NEXT:    [[TMP44:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> [[TMP43]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[TMP45:%.*]] = shufflevector <16 x i32> [[TMP43]], <16 x i32> [[TMP43]], <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@@ -795,9 +795,9 @@ define i32 @full_reorder(ptr nocapture noundef readonly %pix1, i32 noundef %i_pi
 ; CHECK-NEXT:    [[TMP66:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP65]]
 ; CHECK-NEXT:    [[TMP67:%.*]] = sub nsw <16 x i32> [[TMP62]], [[TMP64]]
 ; CHECK-NEXT:    [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP66]], <16 x i32> [[TMP67]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:    [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[TMP70:%.*]] = and <16 x i32> [[TMP69]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP69:%.*]] = lshr <16 x i32> [[TMP68]], splat (i32 15)
+; CHECK-NEXT:    [[TMP70:%.*]] = and <16 x i32> [[TMP69]], splat (i32 65537)
+; CHECK-NEXT:    [[TMP71:%.*]] = mul nuw <16 x i32> [[TMP70]], splat (i32 65535)
 ; CHECK-NEXT:    [[TMP72:%.*]] = add <16 x i32> [[TMP71]], [[TMP68]]
 ; CHECK-NEXT:    [[TMP73:%.*]] = xor <16 x i32> [[TMP72]], [[TMP71]]
 ; CHECK-NEXT:    [[TMP74:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP73]])
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll
index 4216b0e643bb68..3c672efbb5a07a 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shrink-types.ll
@@ -25,7 +25,7 @@ define i32 @test_mask_or(<16 x i32> %a, ptr %b) {
 ; CHECK-LABEL: @test_mask_or(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1
-; CHECK-NEXT:    [[A_MASKED:%.*]] = and <16 x i32> [[A:%.*]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:    [[A_MASKED:%.*]] = and <16 x i32> [[A:%.*]], splat (i32 16)
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc <16 x i32> [[A_MASKED]] to <16 x i8>
 ; CHECK-NEXT:    [[TMP1:%.*]] = or <16 x i8> [[WIDE_LOAD]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = zext <16 x i8> [[TMP1]] to <16 x i32>
@@ -44,14 +44,14 @@ entry:
 define i32 @multiuse(<16 x i32> %u, <16 x i32> %v, ptr %b) {
 ; CHECK-LABEL: @multiuse(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
-; CHECK-NEXT:    [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT:    [[U_MASKED:%.*]] = and <16 x i32> [[U:%.*]], splat (i32 255)
+; CHECK-NEXT:    [[V_MASKED:%.*]] = and <16 x i32> [[V:%.*]], splat (i32 255)
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[B:%.*]], align 1
-; CHECK-NEXT:    [[TMP0:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr <16 x i8> [[WIDE_LOAD]], splat (i8 4)
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc <16 x i32> [[V_MASKED]] to <16 x i8>
 ; CHECK-NEXT:    [[TMP2:%.*]] = or <16 x i8> [[TMP0]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = zext <16 x i8> [[TMP2]] to <16 x i32>
-; CHECK-NEXT:    [[TMP4:%.*]] = and <16 x i8> [[WIDE_LOAD]], <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>
+; CHECK-NEXT:    [[TMP4:%.*]] = and <16 x i8> [[WIDE_LOAD]], splat (i8 15)
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc <16 x i32> [[U_MASKED]] to <16 x i8>
 ; CHECK-NEXT:    [[TMP6:%.*]] = or <16 x i8> [[TMP4]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i32>
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index 66fe11369d88be..43e5a7e152822e 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -292,7 +292,7 @@ define <8 x i8> @undeflane(<8 x i8> %a, <8 x i8> %b) {
 
 define <8 x i8> @constantsplat(<8 x i8> %a) {
 ; CHECK-LABEL: @constantsplat(
-; CHECK-NEXT:    [[R:%.*]] = add <8 x i8> [[A:%.*]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
+; CHECK-NEXT:    [[R:%.*]] = add <8 x i8> [[A:%.*]], splat (i8 10)
 ; CHECK-NEXT:    ret <8 x i8> [[R]]
 ;
   %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
@@ -339,7 +339,7 @@ define <8 x i8> @constantdiff2(<8 x i8> %a) {
 
 define <8 x half> @constantsplatf(<8 x half> %a) {
 ; CHECK-LABEL: @constantsplatf(
-; CHECK-NEXT:    [[R:%.*]] = fadd <8 x half> [[A:%.*]], <half 0xH4900, half 0xH4900, half 0xH4900, half 0xH4900, half 0xH4900, half 0xH4900, half 0xH4900, half 0xH4900>
+; CHECK-NEXT:    [[R:%.*]] = fadd <8 x half> [[A:%.*]], splat (half 0xH4900)
 ; CHECK-NEXT:    ret <8 x half> [[R]]
 ;
   %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll
index a22575ccb1ca21..46a622148c8718 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/vecreduce-shuffle.ll
@@ -28,7 +28,7 @@ define i32 @reduceshuffle_onein_v4i32(<4 x i32> %a) {
 define i32 @reduceshuffle_onein_const_v4i32(<4 x i32> %a) {
 ; CHECK-LABEL: @reduceshuffle_onein_const_v4i32(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[X:%.*]] = xor <4 x i32> [[S]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[X:%.*]] = xor <4 x i32> [[S]], splat (i32 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
@@ -179,7 +179,7 @@ define i32 @reduceshuffle_twoin_extrashuffleuse_v4i32(<4 x i32> %a, <4 x i32> %b
 define i32 @reduceshuffle_twoin_extraotheruse_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_extraotheruse_v4i32(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 5, i32 1, i32 4>
-; CHECK-NEXT:    [[X:%.*]] = xor <4 x i32> [[S]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[X:%.*]] = xor <4 x i32> [[S]], splat (i32 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[X]])
 ; CHECK-NEXT:    call void @use(<4 x i32> [[X]])
 ; CHECK-NEXT:    ret i32 [[R]]
@@ -234,7 +234,7 @@ define i32 @reduceshuffle_onein_v16i32(<16 x i32> %a) {
 define i32 @reduceshuffle_onein_ext_v16i32(<16 x i32> %a) {
 ; CHECK-LABEL: @reduceshuffle_onein_ext_v16i32(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[X:%.*]] = xor <16 x i32> [[S]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
@@ -247,7 +247,7 @@ define i32 @reduceshuffle_onein_ext_v16i32(<16 x i32> %a) {
 define i32 @reduceshuffle_twoin_concat_v16i32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_concat_v16i32(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[X:%.*]] = xor <16 x i32> [[S]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
@@ -260,7 +260,7 @@ define i32 @reduceshuffle_twoin_concat_v16i32(<8 x i32> %a, <8 x i32> %b) {
 define i32 @reduceshuffle_twoin_lowelt_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_lowelt_v16i32(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:    [[X:%.*]] = xor <16 x i32> [[S]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
@@ -273,7 +273,7 @@ define i32 @reduceshuffle_twoin_lowelt_v16i32(<16 x i32> %a, <16 x i32> %b) {
 define i32 @reduceshuffle_twoin_notlowelt_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_notlowelt_v16i32(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 21, i32 22, i32 7, i32 24, i32 25, i32 10, i32 27, i32 28, i32 13, i32 30, i32 31>
-; CHECK-NEXT:    [[X:%.*]] = xor <16 x i32> [[S]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
@@ -286,7 +286,7 @@ define i32 @reduceshuffle_twoin_notlowelt_v16i32(<16 x i32> %a, <16 x i32> %b) {
 define i32 @reduceshuffle_twoin_uneven_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_uneven_v16i32(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
-; CHECK-NEXT:    [[X:%.*]] = xor <16 x i32> [[S]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[X:%.*]] = xor <16 x i32> [[S]], splat (i32 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[X]])
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
@@ -299,9 +299,9 @@ define i32 @reduceshuffle_twoin_uneven_v16i32(<16 x i32> %a, <16 x i32> %b) {
 define i32 @reduceshuffle_twoin_shr1_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_shr1_v16i32(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:    [[A1:%.*]] = lshr <16 x i32> [[S]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    [[A2:%.*]] = and <16 x i32> [[A1]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[A3:%.*]] = mul nuw <16 x i32> [[A2]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[A1:%.*]] = lshr <16 x i32> [[S]], splat (i32 15)
+; CHECK-NEXT:    [[A2:%.*]] = and <16 x i32> [[A1]], splat (i32 65537)
+; CHECK-NEXT:    [[A3:%.*]] = mul nuw <16 x i32> [[A2]], splat (i32 65535)
 ; CHECK-NEXT:    [[A4:%.*]] = add <16 x i32> [[A3]], [[S]]
 ; CHECK-NEXT:    [[A5:%.*]] = xor <16 x i32> [[A4]], [[A3]]
 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[A5]])
@@ -320,9 +320,9 @@ define i32 @reduceshuffle_twoin_shr1_v16i32(<16 x i32> %a, <16 x i32> %b) {
 define i32 @reduceshuffle_twoin_shr2_v16i32(<16 x i32> %a, <16 x i32> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_shr2_v16i32(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:    [[A1:%.*]] = lshr <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>, [[S]]
-; CHECK-NEXT:    [[A2:%.*]] = and <16 x i32> [[A1]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
-; CHECK-NEXT:    [[A3:%.*]] = mul nuw <16 x i32> [[A2]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[A1:%.*]] = lshr <16 x i32> splat (i32 15), [[S]]
+; CHECK-NEXT:    [[A2:%.*]] = and <16 x i32> [[A1]], splat (i32 65537)
+; CHECK-NEXT:    [[A3:%.*]] = mul nuw <16 x i32> [[A2]], splat (i32 65535)
 ; CHECK-NEXT:    [[A4:%.*]] = add <16 x i32> [[A3]], [[S]]
 ; CHECK-NEXT:    [[A5:%.*]] = xor <16 x i32> [[A4]], [[A3]]
 ; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[A5]])
@@ -365,7 +365,7 @@ define i16 @reduceshuffle_onein_v16i16(<16 x i16> %a) {
 define i16 @reduceshuffle_onein_ext_v16i16(<16 x i16> %a) {
 ; CHECK-LABEL: @reduceshuffle_onein_ext_v16i16(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[X:%.*]] = xor <16 x i16> [[S]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
@@ -378,7 +378,7 @@ define i16 @reduceshuffle_onein_ext_v16i16(<16 x i16> %a) {
 define i16 @reduceshuffle_twoin_concat_v16i16(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_concat_v16i16(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[X:%.*]] = xor <16 x i16> [[S]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
@@ -391,7 +391,7 @@ define i16 @reduceshuffle_twoin_concat_v16i16(<8 x i16> %a, <8 x i16> %b) {
 define i16 @reduceshuffle_twoin_lowelt_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_lowelt_v16i16(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:    [[X:%.*]] = xor <16 x i16> [[S]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
@@ -404,7 +404,7 @@ define i16 @reduceshuffle_twoin_lowelt_v16i16(<16 x i16> %a, <16 x i16> %b) {
 define i16 @reduceshuffle_twoin_notlowelt_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_notlowelt_v16i16(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 21, i32 22, i32 7, i32 24, i32 25, i32 10, i32 27, i32 28, i32 13, i32 30, i32 31>
-; CHECK-NEXT:    [[X:%.*]] = xor <16 x i16> [[S]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
@@ -417,7 +417,7 @@ define i16 @reduceshuffle_twoin_notlowelt_v16i16(<16 x i16> %a, <16 x i16> %b) {
 define i16 @reduceshuffle_twoin_uneven_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_uneven_v16i16(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 1, i32 3, i32 5, i32 7, i32 17, i32 19, i32 21, i32 8>
-; CHECK-NEXT:    [[X:%.*]] = xor <16 x i16> [[S]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT:    [[X:%.*]] = xor <16 x i16> [[S]], splat (i16 -1)
 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[X]])
 ; CHECK-NEXT:    ret i16 [[R]]
 ;
@@ -430,9 +430,9 @@ define i16 @reduceshuffle_twoin_uneven_v16i16(<16 x i16> %a, <16 x i16> %b) {
 define i16 @reduceshuffle_twoin_ext_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK-LABEL: @reduceshuffle_twoin_ext_v16i16(
 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:    [[A1:%.*]] = lshr <16 x i16> [[S]], <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:    [[A2:%.*]] = and <16 x i16> [[A1]], <i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257, i16 257>
-; CHECK-NEXT:    [[A3:%.*]] = mul nuw <16 x i16> [[A2]], <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+; CHECK-NEXT:    [[A1:%.*]] = lshr <16 x i16> [[S]], splat (i16 7)
+; CHECK-NEXT:    [[A2:%.*]] = and <16 x i16> [[A1]], splat (i16 257)
+; CHECK-NEXT:    [[A3:%.*]] = mul nuw <16 x i16> [[A2]], splat (i16 255)
 ; CHECK-NEXT:    [[A4:%.*]] = add <16 x i16> [[A3]], [[S]]
 ; CHECK-NEXT:    [[A5:%.*]] = xor <16 x i16> [[A4]], [[A3]]
 ; CHECK-NEXT:    [[R:%.*]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> [[A5]])
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
index 97608174b524d7..28d6b59e2b7f02 100644
--- a/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
+++ b/llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll
@@ -1617,7 +1617,7 @@ define <1 x i64> @add_v1i64_allonesmask(<1 x i64> %x, i64 %y, i32 zeroext %evl)
 ; NO-VEC-COMBINE-NEXT:    [[MASK:%.*]] = shufflevector <1 x i1> [[SPLAT]], <1 x i1> poison, <1 x i32> zeroinitializer
 ; NO-VEC-COMBINE-NEXT:    [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[Y:%.*]], i64 0
 ; NO-VEC-COMBINE-NEXT:    [[TMP2:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <1 x i32> zeroinitializer
-; NO-VEC-COMBINE-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> <i64 42>, <1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; NO-VEC-COMBINE-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 42), <1 x i1> [[MASK]], i32 [[EVL:%.*]])
 ; NO-VEC-COMBINE-NEXT:    [[TMP4:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP3]], <1 x i1> [[MASK]], i32 [[EVL]])
 ; NO-VEC-COMBINE-NEXT:    ret <1 x i64> [[TMP4]]
 ;
@@ -1626,7 +1626,7 @@ define <1 x i64> @add_v1i64_allonesmask(<1 x i64> %x, i64 %y, i32 zeroext %evl)
 ; VEC-COMBINE-32-NEXT:    [[MASK:%.*]] = shufflevector <1 x i1> [[SPLAT]], <1 x i1> poison, <1 x i32> zeroinitializer
 ; VEC-COMBINE-32-NEXT:    [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[Y:%.*]], i64 0
 ; VEC-COMBINE-32-NEXT:    [[TMP2:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <1 x i32> zeroinitializer
-; VEC-COMBINE-32-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> <i64 42>, <1 x i1> [[MASK]], i32 [[EVL:%.*]])
+; VEC-COMBINE-32-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 42), <1 x i1> [[MASK]], i32 [[EVL:%.*]])
 ; VEC-COMBINE-32-NEXT:    [[TMP4:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP3]], <1 x i1> [[MASK]], i32 [[EVL]])
 ; VEC-COMBINE-32-NEXT:    ret <1 x i64> [[TMP4]]
 ;
@@ -1643,7 +1643,7 @@ define <1 x i64> @add_v1i64_anymask(<1 x i64> %x, i64 %y, <1 x i1> %mask, i32 ze
 ; ALL-LABEL: @add_v1i64_anymask(
 ; ALL-NEXT:    [[TMP1:%.*]] = insertelement <1 x i64> poison, i64 [[Y:%.*]], i64 0
 ; ALL-NEXT:    [[TMP2:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> poison, <1 x i32> zeroinitializer
-; ALL-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> <i64 42>, <1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
+; ALL-NEXT:    [[TMP3:%.*]] = call <1 x i64> @llvm.vp.add.v1i64(<1 x i64> [[TMP2]], <1 x i64> splat (i64 42), <1 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
 ; ALL-NEXT:    [[TMP4:%.*]] = call <1 x i64> @llvm.vp.mul.v1i64(<1 x i64> [[X:%.*]], <1 x i64> [[TMP3]], <1 x i1> [[MASK]], i32 [[EVL]])
 ; ALL-NEXT:    ret <1 x i64> [[TMP4]]
 ;
@@ -1669,7 +1669,7 @@ define <4 x i64> @add_v4i64_allonesmask(<4 x i64> %x, i64 %y, i32 zeroext %evl)
 ; NO-VEC-COMBINE-NEXT:    [[MASK:%.*]] = shufflevector <4 x i1> [[SPLAT]], <4 x i1> poison, <4 x i32> zeroinitializer
 ; NO-VEC-COMBINE-NEXT:    [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[Y:%.*]], i64 0
 ; NO-VEC-COMBINE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer
-; NO-VEC-COMBINE-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP2]], <4 x i64> <i64 42, i64 42, i64 42, i64 42>, <4 x i1> [[MASK]], i32 [[EVL:%.*]])
+; NO-VEC-COMBINE-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP2]], <4 x i64> splat (i64 42), <4 x i1> [[MASK]], i32 [[EVL:%.*]])
 ; NO-VEC-COMBINE-NEXT:    [[TMP4:%.*]] = call <4 x i64> @llvm.vp.mul.v4i64(<4 x i64> [[X:%.*]], <4 x i64> [[TMP3]], <4 x i1> [[MASK]], i32 [[EVL]])
 ; NO-VEC-COMBINE-NEXT:    ret <4 x i64> [[TMP4]]
 ;
@@ -1686,7 +1686,7 @@ define <4 x i64> @add_v4i64_anymask(<4 x i64> %x, i64 %y, <4 x i1> %mask, i32 ze
 ; ALL-LABEL: @add_v4i64_anymask(
 ; ALL-NEXT:    [[TMP1:%.*]] = insertelement <4 x i64> poison, i64 [[Y:%.*]], i64 0
 ; ALL-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> poison, <4 x i32> zeroinitializer
-; ALL-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP2]], <4 x i64> <i64 42, i64 42, i64 42, i64 42>, <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
+; ALL-NEXT:    [[TMP3:%.*]] = call <4 x i64> @llvm.vp.add.v4i64(<4 x i64> [[TMP2]], <4 x i64> splat (i64 42), <4 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
 ; ALL-NEXT:    [[TMP4:%.*]] = call <4 x i64> @llvm.vp.mul.v4i64(<4 x i64> [[X:%.*]], <4 x i64> [[TMP3]], <4 x i1> [[MASK]], i32 [[EVL]])
 ; ALL-NEXT:    ret <4 x i64> [[TMP4]]
 ;
diff --git a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
index 21adcdfd29b826..bbdd76c58b58ec 100644
--- a/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/insert-binop-with-constant.ll
@@ -514,7 +514,7 @@ define <2 x i64> @or_constant(i64 %x) {
 define <2 x i64> @or_constant_not_undef_lane(i64 %x) {
 ; CHECK-LABEL: @or_constant_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = or i64 [[X:%.*]], -42
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> <i64 -1, i64 -1>, i64 [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x i64> [[BO]]
 ;
   %ins = insertelement <2 x i64> undef, i64 %x, i32 1
@@ -558,7 +558,7 @@ define <2 x double> @fadd_constant(double %x) {
 define <2 x double> @fadd_constant_not_undef_lane(double %x) {
 ; CHECK-LABEL: @fadd_constant_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fadd double [[X:%.*]], -4.200000e+01
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 1
@@ -580,7 +580,7 @@ define <2 x double> @fsub_constant_op0(double %x) {
 define <2 x double> @fsub_constant_op0_not_undef_lane(double %x) {
 ; CHECK-LABEL: @fsub_constant_op0_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fsub nsz double -4.200000e+01, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 1
@@ -602,7 +602,7 @@ define <2 x double> @fsub_constant_op1(double %x) {
 define <2 x double> @fsub_constant_op1_not_undef_lane(double %x) {
 ; CHECK-LABEL: @fsub_constant_op1_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fsub double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0
@@ -624,7 +624,7 @@ define <2 x double> @fmul_constant(double %x) {
 define <2 x double> @fmul_constant_not_undef_lane(double %x) {
 ; CHECK-LABEL: @fmul_constant_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fmul double [[X:%.*]], -4.200000e+01
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 1
@@ -646,7 +646,7 @@ define <2 x double> @fdiv_constant_op0(double %x) {
 define <2 x double> @fdiv_constant_op0_not_undef_lane(double %x) {
 ; CHECK-LABEL: @fdiv_constant_op0_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fdiv ninf double 4.200000e+01, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0
@@ -668,7 +668,7 @@ define <2 x double> @fdiv_constant_op1(double %x) {
 define <2 x double> @fdiv_constant_op1_not_undef_lane(double %x) {
 ; CHECK-LABEL: @fdiv_constant_op1_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = fdiv double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0
@@ -690,7 +690,7 @@ define <2 x double> @frem_constant_op0(double %x) {
 define <2 x double> @frem_constant_op0_not_undef_lane(double %x) {
 ; CHECK-LABEL: @frem_constant_op0_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = frem double -4.200000e+01, [[X:%.*]]
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 1
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 1
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 1
@@ -712,7 +712,7 @@ define <2 x double> @frem_constant_op1(double %x) {
 define <2 x double> @frem_constant_op1_not_undef_lane(double %x) {
 ; CHECK-LABEL: @frem_constant_op1_not_undef_lane(
 ; CHECK-NEXT:    [[BO_SCALAR:%.*]] = frem nnan double [[X:%.*]], 4.200000e+01
-; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000>, double [[BO_SCALAR]], i64 0
+; CHECK-NEXT:    [[BO:%.*]] = insertelement <2 x double> splat (double 0x7FF8000000000000), double [[BO_SCALAR]], i64 0
 ; CHECK-NEXT:    ret <2 x double> [[BO]]
 ;
   %ins = insertelement <2 x double> undef, double %x, i32 0
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll
index 74a58c8d313611..4c70438b2d6e4e 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll
@@ -126,7 +126,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) {
 ; SSE-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
 ; SSE-NEXT:    [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; SSE-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <16 x i8>
-; SSE-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; SSE-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1)
 ; SSE-NEXT:    [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32>
 ; SSE-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; SSE-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
@@ -135,7 +135,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) {
 ; AVX-LABEL: @PR35454_1(
 ; AVX-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8>
 ; AVX-NEXT:    [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-; AVX-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; AVX-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1)
 ; AVX-NEXT:    [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32>
 ; AVX-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
@@ -156,7 +156,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) {
 ; SSE-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
 ; SSE-NEXT:    [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; SSE-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <8 x i16>
-; SSE-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; SSE-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1)
 ; SSE-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32>
 ; SSE-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; SSE-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
@@ -165,7 +165,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) {
 ; AVX-LABEL: @PR35454_2(
 ; AVX-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16>
 ; AVX-NEXT:    [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
-; AVX-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; AVX-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1)
 ; AVX-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32>
 ; AVX-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
index c423053a9a4839..77b44d0e40e144 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll
@@ -220,7 +220,7 @@ define <4 x i32> @shuf_srem_v4i32_poison(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: define <4 x i32> @shuf_srem_v4i32_poison(
 ; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[SREM0:%.*]] = srem <4 x i32> [[A1]], [[A0]]
-; CHECK-NEXT:    [[SREM1:%.*]] = srem <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[A0]]
+; CHECK-NEXT:    [[SREM1:%.*]] = srem <4 x i32> splat (i32 1), [[A0]]
 ; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[SREM0]], <4 x i32> [[SREM1]], <4 x i32> <i32 0, i32 poison, i32 6, i32 3>
 ; CHECK-NEXT:    ret <4 x i32> [[R]]
 ;
diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll
index c8c9aa161ae289..83e088aaa4e27c 100644
--- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll
@@ -185,7 +185,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) {
 ; SSE-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V]] to <4 x i32>
 ; SSE-NEXT:    [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; SSE-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <16 x i8>
-; SSE-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; SSE-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1)
 ; SSE-NEXT:    [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32>
 ; SSE-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; SSE-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
@@ -195,7 +195,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) {
 ; AVX-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] {
 ; AVX-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8>
 ; AVX-NEXT:    [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
-; AVX-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; AVX-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], splat (i8 1)
 ; AVX-NEXT:    [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32>
 ; AVX-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
@@ -217,7 +217,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) {
 ; SSE-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V]] to <4 x i32>
 ; SSE-NEXT:    [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; SSE-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <8 x i16>
-; SSE-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; SSE-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1)
 ; SSE-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32>
 ; SSE-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; SSE-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
@@ -227,7 +227,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) {
 ; AVX-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] {
 ; AVX-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[V]] to <8 x i16>
 ; AVX-NEXT:    [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
-; AVX-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; AVX-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], splat (i16 1)
 ; AVX-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32>
 ; AVX-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; AVX-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>